From 7efb14256dd306407cf8388a46a4a6c5c5c85774 Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Tue, 2 Nov 2021 22:15:35 +0800 Subject: [PATCH 001/493] remoteproc: Use %pe format string to print return error code Use %pe format string to print return error code which make the error message easier to understand. Signed-off-by: Mark-PK Tsai Link: https://lore.kernel.org/r/20211102141535.28372-1-mark-pk.tsai@mediatek.com Reviewed-by: Bjorn Andersson [Fixed capital letter in subject line] Signed-off-by: Mathieu Poirier --- drivers/remoteproc/remoteproc_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 775df165eb45..69f51acf235e 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -577,8 +577,8 @@ static int rproc_handle_vdev(struct rproc *rproc, void *ptr, dma_get_mask(rproc->dev.parent)); if (ret) { dev_warn(dev, - "Failed to set DMA mask %llx. Trying to continue... %x\n", - dma_get_mask(rproc->dev.parent), ret); + "Failed to set DMA mask %llx. Trying to continue... (%pe)\n", + dma_get_mask(rproc->dev.parent), ERR_PTR(ret)); } /* parse the vrings */ From 4da96175014be67c846fd274eace08066e525d75 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 16 Oct 2021 08:44:28 +0200 Subject: [PATCH 002/493] remoteproc: imx_rproc: Fix a resource leak in the remove function 'priv->workqueue' is destroyed in the error handling path of the probe but not in the remove function. Add the missing call to release some resources. Cc: stable Fixes: 2df7062002d0 ("remoteproc: imx_proc: enable virtio/mailbox") Signed-off-by: Christophe JAILLET Reviewed-by: Peng Fan Tested-by: Peng Fan Link: https://lore.kernel.org/r/d28ca94a4031bd7297d47c2164e18885a5a6ec19.1634366546.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mathieu Poirier --- drivers/remoteproc/imx_rproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index ff8170dbbc3c..0a45bc0d3f73 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -804,6 +804,7 @@ static int imx_rproc_remove(struct platform_device *pdev) clk_disable_unprepare(priv->clk); rproc_del(rproc); imx_rproc_free_mbox(rproc); + destroy_workqueue(priv->workqueue); rproc_free(rproc); return 0; From 876e0b26ccd211ca92607d83c87cc1f097784c6d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 10 Nov 2021 11:21:01 +0800 Subject: [PATCH 003/493] remoteproc: coredump: Correct argument 2 type for memcpy_fromio Address the sparse check warning: >> drivers/remoteproc/remoteproc_coredump.c:169:53: sparse: warning: incorrect type in argument 2 (different address spaces) sparse: expected void const volatile [noderef] __iomem *src sparse: got void *[assigned] ptr Reported-by: kernel test robot Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211110032101.517487-1-peng.fan@oss.nxp.com Signed-off-by: Mathieu Poirier --- drivers/remoteproc/remoteproc_coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c index c892f433a323..4b093420d98a 100644 --- a/drivers/remoteproc/remoteproc_coredump.c +++ b/drivers/remoteproc/remoteproc_coredump.c @@ -166,7 +166,7 @@ static void rproc_copy_segment(struct rproc *rproc, void *dest, memset(dest, 0xff, size); } else { if (is_iomem) - memcpy_fromio(dest, ptr, size); + memcpy_fromio(dest, (void const __iomem *)ptr, size); else memcpy(dest, ptr, size); } From 60630924bb5af8751adcecc896e7763c3783ca89 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Mon, 11 Oct 2021 15:58:36 +0200 Subject: [PATCH 004/493] hwspinlock: stm32: enable clock at probe Set the clock during probe and keep its control during suspend / resume operations. This fixes an issue when CONFIG_PM is not set and where the clock is never enabled. Make use of devm_ functions to simplify the code. Signed-off-by: Fabien Dessenne Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211011135836.1045437-1-fabien.dessenne@foss.st.com --- drivers/hwspinlock/stm32_hwspinlock.c | 62 +++++++++++++++++---------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/drivers/hwspinlock/stm32_hwspinlock.c b/drivers/hwspinlock/stm32_hwspinlock.c index 3ad0ce0da4d9..5bd11a7fab65 100644 --- a/drivers/hwspinlock/stm32_hwspinlock.c +++ b/drivers/hwspinlock/stm32_hwspinlock.c @@ -54,8 +54,23 @@ static const struct hwspinlock_ops stm32_hwspinlock_ops = { .relax = stm32_hwspinlock_relax, }; +static void stm32_hwspinlock_disable_clk(void *data) +{ + struct platform_device *pdev = data; + struct stm32_hwspinlock *hw = platform_get_drvdata(pdev); + struct device *dev = &pdev->dev; + + pm_runtime_get_sync(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_put_noidle(dev); + + clk_disable_unprepare(hw->clk); +} + static int stm32_hwspinlock_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct stm32_hwspinlock *hw; void __iomem *io_base; size_t array_size; @@ -66,43 +81,45 @@ static int stm32_hwspinlock_probe(struct platform_device *pdev) return PTR_ERR(io_base); array_size = STM32_MUTEX_NUM_LOCKS * sizeof(struct hwspinlock); - hw = devm_kzalloc(&pdev->dev, sizeof(*hw) + array_size, GFP_KERNEL); + hw = devm_kzalloc(dev, sizeof(*hw) + array_size, GFP_KERNEL); if (!hw) return -ENOMEM; - hw->clk = devm_clk_get(&pdev->dev, "hsem"); + hw->clk = devm_clk_get(dev, "hsem"); if (IS_ERR(hw->clk)) return PTR_ERR(hw->clk); + ret = clk_prepare_enable(hw->clk); + if (ret) { + dev_err(dev, "Failed to prepare_enable clock\n"); + return ret; + } + + platform_set_drvdata(pdev, hw); + + pm_runtime_get_noresume(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + pm_runtime_put(dev); + + ret = devm_add_action_or_reset(dev, stm32_hwspinlock_disable_clk, pdev); + if (ret) { + dev_err(dev, "Failed to register action\n"); + return ret; + } + for (i = 0; i < STM32_MUTEX_NUM_LOCKS; i++) hw->bank.lock[i].priv = io_base + i * sizeof(u32); - platform_set_drvdata(pdev, hw); - pm_runtime_enable(&pdev->dev); - - ret = hwspin_lock_register(&hw->bank, &pdev->dev, &stm32_hwspinlock_ops, - 0, STM32_MUTEX_NUM_LOCKS); + ret = devm_hwspin_lock_register(dev, &hw->bank, &stm32_hwspinlock_ops, + 0, STM32_MUTEX_NUM_LOCKS); if (ret) - pm_runtime_disable(&pdev->dev); + dev_err(dev, "Failed to register hwspinlock\n"); return ret; } -static int stm32_hwspinlock_remove(struct platform_device *pdev) -{ - struct stm32_hwspinlock *hw = platform_get_drvdata(pdev); - int ret; - - ret = hwspin_lock_unregister(&hw->bank); - if (ret) - dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret); - - pm_runtime_disable(&pdev->dev); - - return 0; -} - static int __maybe_unused stm32_hwspinlock_runtime_suspend(struct device *dev) { struct stm32_hwspinlock *hw = dev_get_drvdata(dev); @@ -135,7 +152,6 @@ MODULE_DEVICE_TABLE(of, stm32_hwpinlock_ids); static struct platform_driver stm32_hwspinlock_driver = { .probe = stm32_hwspinlock_probe, - .remove = stm32_hwspinlock_remove, .driver = { .name = "stm32_hwspinlock", .of_match_table = stm32_hwpinlock_ids, From c572724406e3926502eccc930447f0affb604503 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 8 Nov 2021 14:59:45 +0100 Subject: [PATCH 005/493] rpmsg: char: Add pr_fmt() to prefix messages Make all messages to be prefixed in a unified way. Add pr_fmt() to achieve this. Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20211108135945.3364-1-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier --- drivers/rpmsg/rpmsg_char.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index b5907b80727c..d6214cb66026 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -9,6 +9,9 @@ * Based on rpmsg performance statistics driver by Michal Simek, which in turn * was based on TI & Google OMX rpmsg driver. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -550,7 +553,7 @@ static int rpmsg_chrdev_init(void) ret = alloc_chrdev_region(&rpmsg_major, 0, RPMSG_DEV_MAX, "rpmsg"); if (ret < 0) { - pr_err("rpmsg: failed to allocate char dev region\n"); + pr_err("failed to allocate char dev region\n"); return ret; } @@ -563,7 +566,7 @@ static int rpmsg_chrdev_init(void) ret = register_rpmsg_driver(&rpmsg_chrdev_driver); if (ret < 0) { - pr_err("rpmsgchr: failed to register rpmsg driver\n"); + pr_err("failed to register rpmsg driver\n"); class_destroy(rpmsg_class); unregister_chrdev_region(rpmsg_major, RPMSG_DEV_MAX); } From 631af6e0f41002dc63504efd2f6c3e1ab5f931c5 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 8 Nov 2021 15:01:26 +0100 Subject: [PATCH 006/493] rpmsg: Fix documentation return formatting kernel documentation specification: "The return value, if any, should be described in a dedicated section named Return." Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20211108140126.3530-1-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier --- drivers/rpmsg/qcom_glink_native.c | 2 +- drivers/rpmsg/qcom_smd.c | 2 +- drivers/rpmsg/rpmsg_core.c | 24 ++++++++++++------------ drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 3f377a795b33..1030cfa80e04 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -427,7 +427,7 @@ static void qcom_glink_handle_intent_req_ack(struct qcom_glink *glink, * Allocates a local channel id and sends a RPM_CMD_OPEN message to the remote. * Will return with refcount held, regardless of outcome. * - * Returns 0 on success, negative errno otherwise. + * Return: 0 on success, negative errno otherwise. */ static int qcom_glink_send_open_req(struct qcom_glink *glink, struct glink_channel *channel) diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 8da1b5cb31b3..540e027f08c4 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -1467,7 +1467,7 @@ ATTRIBUTE_GROUPS(qcom_smd_edge); * @parent: parent device for the edge * @node: device_node describing the edge * - * Returns an edge reference, or negative ERR_PTR() on failure. + * Return: an edge reference, or negative ERR_PTR() on failure. */ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent, struct device_node *node) diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index d3eb60059ef1..f031b2b1b21c 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -26,7 +26,7 @@ * @rpdev: rpmsg device * @chinfo: channel_info to bind * - * Returns a pointer to the new rpmsg device on success, or NULL on error. + * Return: a pointer to the new rpmsg device on success, or NULL on error. */ struct rpmsg_device *rpmsg_create_channel(struct rpmsg_device *rpdev, struct rpmsg_channel_info *chinfo) @@ -48,7 +48,7 @@ EXPORT_SYMBOL(rpmsg_create_channel); * @rpdev: rpmsg device * @chinfo: channel_info to bind * - * Returns 0 on success or an appropriate error value. + * Return: 0 on success or an appropriate error value. */ int rpmsg_release_channel(struct rpmsg_device *rpdev, struct rpmsg_channel_info *chinfo) @@ -102,7 +102,7 @@ EXPORT_SYMBOL(rpmsg_release_channel); * dynamically assign them an available rpmsg address (drivers should have * a very good reason why not to always use RPMSG_ADDR_ANY here). * - * Returns a pointer to the endpoint on success, or NULL on error. + * Return: a pointer to the endpoint on success, or NULL on error. */ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev, rpmsg_rx_cb_t cb, void *priv, @@ -146,7 +146,7 @@ EXPORT_SYMBOL(rpmsg_destroy_ept); * * Can only be called from process context (for now). * - * Returns 0 on success and an appropriate error value on failure. + * Return: 0 on success and an appropriate error value on failure. */ int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) { @@ -175,7 +175,7 @@ EXPORT_SYMBOL(rpmsg_send); * * Can only be called from process context (for now). * - * Returns 0 on success and an appropriate error value on failure. + * Return: 0 on success and an appropriate error value on failure. */ int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) { @@ -206,7 +206,7 @@ EXPORT_SYMBOL(rpmsg_sendto); * * Can only be called from process context (for now). * - * Returns 0 on success and an appropriate error value on failure. + * Return: 0 on success and an appropriate error value on failure. */ int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len) @@ -235,7 +235,7 @@ EXPORT_SYMBOL(rpmsg_send_offchannel); * * Can only be called from process context (for now). * - * Returns 0 on success and an appropriate error value on failure. + * Return: 0 on success and an appropriate error value on failure. */ int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) { @@ -263,7 +263,7 @@ EXPORT_SYMBOL(rpmsg_trysend); * * Can only be called from process context (for now). * - * Returns 0 on success and an appropriate error value on failure. + * Return: 0 on success and an appropriate error value on failure. */ int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) { @@ -282,7 +282,7 @@ EXPORT_SYMBOL(rpmsg_trysendto); * @filp: file for poll_wait() * @wait: poll_table for poll_wait() * - * Returns mask representing the current state of the endpoint's send buffers + * Return: mask representing the current state of the endpoint's send buffers */ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, poll_table *wait) @@ -313,7 +313,7 @@ EXPORT_SYMBOL(rpmsg_poll); * * Can only be called from process context (for now). * - * Returns 0 on success and an appropriate error value on failure. + * Return: 0 on success and an appropriate error value on failure. */ int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len) @@ -623,7 +623,7 @@ EXPORT_SYMBOL(rpmsg_unregister_device); * @rpdrv: pointer to a struct rpmsg_driver * @owner: owning module/driver * - * Returns 0 on success, and an appropriate error value on failure. + * Return: 0 on success, and an appropriate error value on failure. */ int __register_rpmsg_driver(struct rpmsg_driver *rpdrv, struct module *owner) { @@ -637,7 +637,7 @@ EXPORT_SYMBOL(__register_rpmsg_driver); * unregister_rpmsg_driver() - unregister an rpmsg driver from the rpmsg bus * @rpdrv: pointer to a struct rpmsg_driver * - * Returns 0 on success, and an appropriate error value on failure. + * Return: 0 on success, and an appropriate error value on failure. */ void unregister_rpmsg_driver(struct rpmsg_driver *rpdrv) { diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 9c112aa65040..c37451512835 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -547,7 +547,7 @@ static void rpmsg_downref_sleepers(struct virtproc_info *vrp) * should use the appropriate rpmsg_{try}send{to, _offchannel} API * (see include/linux/rpmsg.h). * - * Returns 0 on success and an appropriate error value on failure. + * Return: 0 on success and an appropriate error value on failure. */ static int rpmsg_send_offchannel_raw(struct rpmsg_device *rpdev, u32 src, u32 dst, From 365fceecd66e2f4c9b4c4f636b506079052562ea Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 18 Nov 2021 20:28:02 +0100 Subject: [PATCH 007/493] dmaengine: ti: edma: Use 'for_each_set_bit' when possible Use 'for_each_set_bit()' instead of hand wrinting it. It is much less version. Signed-off-by: Christophe JAILLET Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/47a7415d3aff8dfb66780bd6f80b085db4503bf7.1637263609.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/ti/edma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 35d81bd857f1..08e47f44d325 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -1681,8 +1681,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val); emr = val; - for (i = find_next_bit(&emr, 32, 0); i < 32; - i = find_next_bit(&emr, 32, i + 1)) { + for_each_set_bit(i, &emr, 32) { int k = (j << 5) + i; /* Clear the corresponding EMR bits */ From 5d78abb6fbc974d601dd365b9ce39f320fb5ba79 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:02 -0700 Subject: [PATCH 008/493] dmaengine: idxd: rework descriptor free path on failure Refactor the completion function to allow skipping of descriptor freeing on the submission failure path. This completely removes descriptor freeing from the submit failure path and leave the responsibility to the caller. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528416222.3925689.12859769271667814762.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/dma.c | 10 ++++++++-- drivers/dma/idxd/idxd.h | 8 +------- drivers/dma/idxd/init.c | 9 +++------ drivers/dma/idxd/irq.c | 8 ++++---- drivers/dma/idxd/submit.c | 12 +++--------- 5 files changed, 19 insertions(+), 28 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index c39e9483206a..1ea663215909 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -21,7 +21,8 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c) } void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type) + enum idxd_complete_type comp_type, + bool free_desc) { struct dma_async_tx_descriptor *tx; struct dmaengine_result res; @@ -44,6 +45,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, tx->callback = NULL; tx->callback_result = NULL; } + + if (free_desc) + idxd_free_desc(desc->wq, desc); } static void op_flag_setup(unsigned long flags, u32 *desc_flags) @@ -153,8 +157,10 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); rc = idxd_submit_desc(wq, desc); - if (rc < 0) + if (rc < 0) { + idxd_free_desc(wq, desc); return rc; + } return cookie; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 0cf8d3145870..3d600f8ee90b 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -579,7 +579,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq); void idxd_unregister_dma_channel(struct idxd_wq *wq); void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res); void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type); + enum idxd_complete_type comp_type, bool free_desc); /* cdev */ int idxd_cdev_register(void); @@ -603,10 +603,4 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7bf03f371ce1..4373b48cdc91 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -717,10 +717,8 @@ static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) if (!head) return; - llist_for_each_entry_safe(desc, itr, head, llnode) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); - } + llist_for_each_entry_safe(desc, itr, head, llnode) + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); } static void idxd_flush_work_list(struct idxd_irq_entry *ie) @@ -729,8 +727,7 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie) list_for_each_entry_safe(desc, iter, &ie->work_list, list) { list_del(&desc->list); - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); } } diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 17f2f8a31b63..d9c4fc22536d 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -195,11 +195,11 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } else { spin_lock(&irq_entry->list_lock); list_add_tail(&desc->list, @@ -238,11 +238,11 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index de76fb4abac2..ea11809dbb32 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -129,7 +129,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, spin_unlock(&ie->list_lock); if (found) - complete_desc(found, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) @@ -139,15 +139,11 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) void __iomem *portal; int rc; - if (idxd->state != IDXD_DEV_ENABLED) { - idxd_free_desc(wq, desc); + if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - } - if (!percpu_ref_tryget_live(&wq->wq_active)) { - idxd_free_desc(wq, desc); + if (!percpu_ref_tryget_live(&wq->wq_active)) return -ENXIO; - } portal = idxd_wq_portal_addr(wq); @@ -182,8 +178,6 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) /* abort operation frees the descriptor */ if (ie) llist_abort_desc(wq, ie, desc); - else - idxd_free_desc(wq, desc); return rc; } } From 8b67426e05584e956775f4b134596b56bc0d35e0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:10 -0700 Subject: [PATCH 009/493] dmaengine: idxd: int handle management refactoring Attach int_handle to irq_entry. This removes the separate management of int handles and reduces the confusion of interating through int handles that is off by 1 count. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528417065.3925689.11505755433684476288.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 8 ++++ drivers/dma/idxd/idxd.h | 10 ++++- drivers/dma/idxd/init.c | 86 ++++++++++++++++++++------------------- drivers/dma/idxd/submit.c | 6 +-- drivers/dma/idxd/sysfs.c | 1 - 5 files changed, 64 insertions(+), 47 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fab412349f7f..f381319615fd 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1206,6 +1206,13 @@ int __drv_enable_wq(struct idxd_wq *wq) goto err; } + /* + * Device has 1 misc interrupt and N interrupts for descriptor completion. To + * assign WQ to interrupt, we will take the N+1 interrupt since vector 0 is + * for the misc interrupt. + */ + wq->ie = &idxd->irq_entries[wq->id + 1]; + rc = idxd_wq_enable(wq); if (rc < 0) { dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); @@ -1256,6 +1263,7 @@ void __drv_disable_wq(struct idxd_wq *wq) idxd_wq_drain(wq); idxd_wq_reset(wq); + wq->ie = NULL; wq->client_count = 0; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 3d600f8ee90b..355159d4ee68 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "registers.h" @@ -64,6 +65,7 @@ extern struct idxd_device_driver idxd_drv; extern struct idxd_device_driver idxd_dmaengine_drv; extern struct idxd_device_driver idxd_user_drv; +#define INVALID_INT_HANDLE -1 struct idxd_irq_entry { struct idxd_device *idxd; int id; @@ -75,6 +77,9 @@ struct idxd_irq_entry { * and irq thread processing error descriptor. */ spinlock_t list_lock; + int int_handle; + struct idxd_wq *wq; + ioasid_t pasid; }; struct idxd_group { @@ -171,6 +176,7 @@ struct idxd_wq { struct wait_queue_head err_queue; struct idxd_device *idxd; int id; + struct idxd_irq_entry *ie; enum idxd_wq_type type; struct idxd_group *group; int client_count; @@ -266,6 +272,8 @@ struct idxd_device { unsigned int pasid; int num_groups; + int irq_cnt; + bool request_int_handles; u32 msix_perm_offset; u32 wqcfg_offset; @@ -292,8 +300,6 @@ struct idxd_device { struct workqueue_struct *wq; struct work_struct work; - int *int_handles; - struct idxd_pmu *idxd_pmu; }; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 4373b48cdc91..2dbff722e207 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -81,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) dev_err(dev, "Not MSI-X interrupt capable.\n"); return -ENOSPC; } + idxd->irq_cnt = msixcnt; rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); if (rc != msixcnt) { @@ -103,7 +104,18 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) for (i = 0; i < msixcnt; i++) { idxd->irq_entries[i].id = i; idxd->irq_entries[i].idxd = idxd; + /* + * Association of WQ should be assigned starting with irq_entry 1. + * irq_entry 0 is for misc interrupts and has no wq association + */ + if (i > 0) + idxd->irq_entries[i].wq = idxd->wqs[i - 1]; idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); + idxd->irq_entries[i].int_handle = INVALID_INT_HANDLE; + if (device_pasid_enabled(idxd) && i > 0) + idxd->irq_entries[i].pasid = idxd->pasid; + else + idxd->irq_entries[i].pasid = INVALID_IOASID; spin_lock_init(&idxd->irq_entries[i].list_lock); } @@ -135,22 +147,14 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - /* - * The MSIX vector enumeration starts at 1 with vector 0 being the - * misc interrupt that handles non I/O completion events. The - * interrupt handles are for IMS enumeration on guest. The misc - * interrupt vector does not require a handle and therefore we start - * the int_handles at index 0. Since 'i' starts at 1, the first - * int_handles index will be 0. - */ - rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1], + if (idxd->request_int_handles) { + rc = idxd_device_request_int_handle(idxd, i, &irq_entry->int_handle, IDXD_IRQ_MSIX); if (rc < 0) { free_irq(irq_entry->vector, irq_entry); goto err_wq_irqs; } - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]); + dev_dbg(dev, "int handle requested: %u\n", irq_entry->int_handle); } } @@ -161,9 +165,15 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) while (--i >= 0) { irq_entry = &idxd->irq_entries[i]; free_irq(irq_entry->vector, irq_entry); - if (i != 0) - idxd_device_release_int_handle(idxd, - idxd->int_handles[i], IDXD_IRQ_MSIX); + if (irq_entry->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, irq_entry->int_handle, + IDXD_IRQ_MSIX); + irq_entry->int_handle = INVALID_INT_HANDLE; + irq_entry->pasid = INVALID_IOASID; + } + irq_entry->vector = -1; + irq_entry->wq = NULL; + irq_entry->idxd = NULL; } err_misc_irq: /* Disable error interrupt generation */ @@ -179,21 +189,19 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct idxd_irq_entry *irq_entry; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(pdev); - if (msixcnt <= 0) - return; - - irq_entry = &idxd->irq_entries[0]; - free_irq(irq_entry->vector, irq_entry); - - for (i = 1; i < msixcnt; i++) { + int i; + for (i = 0; i < idxd->irq_cnt; i++) { irq_entry = &idxd->irq_entries[i]; - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) - idxd_device_release_int_handle(idxd, idxd->int_handles[i], + if (irq_entry->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, irq_entry->int_handle, IDXD_IRQ_MSIX); + irq_entry->int_handle = INVALID_INT_HANDLE; + irq_entry->pasid = INVALID_IOASID; + } + irq_entry->vector = -1; + irq_entry->wq = NULL; + irq_entry->idxd = NULL; free_irq(irq_entry->vector, irq_entry); } @@ -379,13 +387,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) init_waitqueue_head(&idxd->cmd_waitq); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL, - dev_to_node(dev)); - if (!idxd->int_handles) - return -ENOMEM; - } - rc = idxd_setup_wqs(idxd); if (rc < 0) goto err_wqs; @@ -416,7 +417,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) put_device(wq_confdev(idxd->wqs[i])); err_wqs: - kfree(idxd->int_handles); return rc; } @@ -451,6 +451,10 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); } + /* reading command capabilities */ + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) + idxd->request_int_handles = true; + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; @@ -748,15 +752,15 @@ static void idxd_release_int_handles(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; int i, rc; - for (i = 0; i < idxd->num_wq_irqs; i++) { - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) { - rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i], - IDXD_IRQ_MSIX); + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + + if (ie->int_handle != INVALID_INT_HANDLE) { + rc = idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); if (rc < 0) - dev_warn(dev, "irq handle %d release failed\n", - idxd->int_handles[i]); + dev_warn(dev, "irq handle %d release failed\n", ie->int_handle); else - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]); + dev_dbg(dev, "int handle released: %u\n", ie->int_handle); } } } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index ea11809dbb32..d4688f369bc2 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -25,10 +25,10 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match * vector 1:1 to the WQ id, we need to add 1 */ - if (!idxd->int_handles) + if (wq->ie->int_handle == INVALID_INT_HANDLE) desc->hw->int_handle = wq->id + 1; else - desc->hw->int_handle = idxd->int_handles[wq->id]; + desc->hw->int_handle = wq->ie->int_handle; return desc; } @@ -159,7 +159,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * that we designated the descriptor to. */ if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - ie = &idxd->irq_entries[wq->id + 1]; + ie = wq->ie; llist_add(&desc->llnode, &ie->pending_llist); } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index a9025be940db..90857e776273 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1269,7 +1269,6 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); kfree(idxd->irq_entries); - kfree(idxd->int_handles); ida_free(&idxd_ida, idxd->id); kfree(idxd); } From eb0cf33a91b46cd50b590d032471f7f977d5a92a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:17 -0700 Subject: [PATCH 010/493] dmaengine: idxd: move interrupt handle assignment In preparation of supporting interrupt handle revoke event, move the interrupt handle assignment to right before the descriptor to be submitted. This allows the interrupt handle revoke logic to assign the latest interrupt handle on submission. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528417767.3925689.7730411152122952808.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index d4688f369bc2..df02c5c814e7 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -21,15 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; - /* - * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match - * vector 1:1 to the WQ id, we need to add 1 - */ - if (wq->ie->int_handle == INVALID_INT_HANDLE) - desc->hw->int_handle = wq->id + 1; - else - desc->hw->int_handle = wq->ie->int_handle; - return desc; } @@ -160,6 +151,11 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) */ if (desc->hw->flags & IDXD_OP_FLAG_RCI) { ie = wq->ie; + if (ie->int_handle == INVALID_INT_HANDLE) + desc->hw->int_handle = ie->id; + else + desc->hw->int_handle = ie->int_handle; + llist_add(&desc->llnode, &ie->pending_llist); } From 46c6df1c958e55558212cfa94cad201eae48d684 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:23 -0700 Subject: [PATCH 011/493] dmaengine: idxd: add helper for per interrupt handle drain The helper is called at the completion of the interrupt handle refresh event. It issues drain descriptors to each of the wq with associated interrupt handle. The drain descriptor will have interrupt request set but without completion record. This will ensure all descriptors with incorrect interrupt completion handle get drained and a completion interrupt is triggered for the guest driver to process them. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528418315.3925689.7944718440052849626.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index d9c4fc22536d..5434f702901a 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -55,6 +55,45 @@ static void idxd_device_reinit(struct work_struct *work) idxd_device_clear_state(idxd); } +/* + * The function sends a drain descriptor for the interrupt handle. The drain ensures + * all descriptors with this interrupt handle is flushed and the interrupt + * will allow the cleanup of the outstanding descriptors. + */ +static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) +{ + struct idxd_wq *wq = ie->wq; + struct idxd_device *idxd = ie->idxd; + struct device *dev = &idxd->pdev->dev; + struct dsa_hw_desc desc = {}; + void __iomem *portal; + int rc; + + /* Issue a simple drain operation with interrupt but no completion record */ + desc.flags = IDXD_OP_FLAG_RCI; + desc.opcode = DSA_OPCODE_DRAIN; + desc.priv = 1; + + if (ie->pasid != INVALID_IOASID) + desc.pasid = ie->pasid; + desc.int_handle = ie->int_handle; + portal = idxd_wq_portal_addr(wq); + + /* + * The wmb() makes sure that the descriptor is all there before we + * issue. + */ + wmb(); + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, &desc, 1); + } else { + rc = enqcmds(portal, &desc); + /* This should not fail unless hardware failed. */ + if (rc < 0) + dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); + } +} + static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; From bd5970a0d01f8e45af9b2e2cf1d245b84ea757ba Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:29 -0700 Subject: [PATCH 012/493] dmaengine: idxd: create locked version of idxd_quiesce() call Add a locked version of idxd_quiesce() call so that the quiesce can be called with a lock in situations where the lock is not held by the caller. In the driver probe/remove path, the lock is already held, so the raw version can be called w/o locking. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528418980.3925689.5841907054957931211.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 10 +++++++++- drivers/dma/idxd/dma.c | 4 ++-- drivers/dma/idxd/idxd.h | 1 + 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index f381319615fd..943e9967627b 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -411,12 +411,20 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq) return 0; } -void idxd_wq_quiesce(struct idxd_wq *wq) +void __idxd_wq_quiesce(struct idxd_wq *wq) { + lockdep_assert_held(&wq->wq_lock); percpu_ref_kill(&wq->wq_active); wait_for_completion(&wq->wq_dead); } +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + mutex_lock(&wq->wq_lock); + __idxd_wq_quiesce(wq); + mutex_unlock(&wq->wq_lock); +} + /* Device control bits */ static inline bool idxd_is_enabled(struct idxd_device *idxd) { diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 1ea663215909..375dbae18583 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -316,7 +316,7 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) return 0; err_dma: - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); percpu_ref_exit(&wq->wq_active); err_ref: idxd_wq_free_resources(wq); @@ -333,7 +333,7 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); mutex_lock(&wq->wq_lock); - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); idxd_unregister_dma_channel(wq); idxd_wq_free_resources(wq); __drv_disable_wq(wq); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 355159d4ee68..970701738c8a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -570,6 +570,7 @@ int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); +void __idxd_wq_quiesce(struct idxd_wq *wq); void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); From f6d442f7088cbf5e2ac4561aca6888380239d5b9 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:36 -0700 Subject: [PATCH 013/493] dmaengine: idxd: handle invalid interrupt handle descriptors Handle a descriptor that has been marked with invalid interrupt handle error in status. Create a work item that will resubmit the descriptor. This typically happens when the driver has handled the revoke interrupt handle event and has a new interrupt handle. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528419601.3925689.4166517602890523193.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/dma.c | 14 ++++++++---- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/irq.c | 50 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 375dbae18583..2ce873994e33 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -24,18 +24,24 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, enum idxd_complete_type comp_type, bool free_desc) { + struct idxd_device *idxd = desc->wq->idxd; struct dma_async_tx_descriptor *tx; struct dmaengine_result res; int complete = 1; - if (desc->completion->status == DSA_COMP_SUCCESS) + if (desc->completion->status == DSA_COMP_SUCCESS) { res.result = DMA_TRANS_NOERROR; - else if (desc->completion->status) + } else if (desc->completion->status) { + if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT && + desc->completion->status == DSA_COMP_INT_HANDLE_INVAL && + idxd_queue_int_handle_resubmit(desc)) + return; res.result = DMA_TRANS_WRITE_FAILED; - else if (comp_type == IDXD_COMPLETE_ABORT) + } else if (comp_type == IDXD_COMPLETE_ABORT) { res.result = DMA_TRANS_ABORTED; - else + } else { complete = 0; + } tx = &desc->txd; if (complete && tx->cookie) { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 970701738c8a..82c4915f58a2 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -524,6 +524,7 @@ void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); void idxd_wqs_quiesce(struct idxd_device *idxd); +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 5434f702901a..eaaec7a2c740 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -22,6 +22,11 @@ struct idxd_fault { struct idxd_device *idxd; }; +struct idxd_resubmit { + struct work_struct work; + struct idxd_desc *desc; +}; + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -216,6 +221,51 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } +static void idxd_int_handle_resubmit_work(struct work_struct *work) +{ + struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); + struct idxd_desc *desc = irw->desc; + struct idxd_wq *wq = desc->wq; + int rc; + + desc->completion->status = 0; + rc = idxd_submit_desc(wq, desc); + if (rc < 0) { + dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", + desc->id, wq->id); + /* + * If the error is not -EAGAIN, it means the submission failed due to wq + * has been killed instead of ENQCMDS failure. Here the driver needs to + * notify the submitter of the failure by reporting abort status. + * + * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the + * abort. + */ + if (rc != -EAGAIN) { + desc->completion->status = IDXD_COMP_DESC_ABORT; + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false); + } + idxd_free_desc(wq, desc); + } + kfree(irw); +} + +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) +{ + struct idxd_wq *wq = desc->wq; + struct idxd_device *idxd = wq->idxd; + struct idxd_resubmit *irw; + + irw = kzalloc(sizeof(*irw), GFP_KERNEL); + if (!irw) + return false; + + irw->desc = desc; + INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); + queue_work(idxd->wq, &irw->work); + return true; +} + static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) { struct idxd_desc *desc, *t; From 56fc39f5a36794c4f27f5fee047b641eac3f5b89 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:41 -0700 Subject: [PATCH 014/493] dmaengine: idxd: handle interrupt handle revoked event "Interrupt handle revoked" is an event that happens when the driver is running on a guest kernel and the VM is migrated to a new machine. The device will trigger an interrupt that signals to the guest driver that the interrupt handles need to be replaced. The misc irq thread function calls a helper function to handle the event. The function uses the WQ percpu_ref to quiesce the kernel submissions. It then replaces the interrupt handles by requesting interrupt handle command for each I/O MSIX vector. Once the handle is updated, the driver will unblock the submission path to allow new submissions. The submitter will attempt to acquire a percpu_ref before submission. When the request fails, it will wait on the wq_resurrect 'completion'. The driver does anticipate the possibility of descriptors being submitted before the WQ percpu_ref is killed. If a descriptor has already been submitted, it will return with incorrect interrupt handle status. The descriptor will be re-submitted with the new interrupt handle on the completion path. For descriptors with incorrect interrupt handles, completion interrupt won't be triggered. At the completion of the interrupt handle refresh, the handling function will call idxd_int_handle_refresh_drain() to issue drain descriptors to each of the wq with associated interrupt handle. The drain descriptor will have interrupt request set but without completion record. This will ensure all descriptors with incorrect interrupt completion handle get drained and a completion interrupt is triggered for the guest driver to process them. Reviewed-by: Kevin Tian Co-Developed-by: Sanjay Kumar Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528420189.3925689.18212568593220415551.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 6 +- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/irq.c | 137 +++++++++++++++++++++++++++++++++++ drivers/dma/idxd/registers.h | 1 + drivers/dma/idxd/submit.c | 10 ++- 6 files changed, 152 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 943e9967627b..1dc5245107df 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -404,17 +404,21 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq) int rc; memset(&wq->wq_active, 0, sizeof(wq->wq_active)); - rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); if (rc < 0) return rc; reinit_completion(&wq->wq_dead); + reinit_completion(&wq->wq_resurrect); return 0; } void __idxd_wq_quiesce(struct idxd_wq *wq) { lockdep_assert_held(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); percpu_ref_kill(&wq->wq_active); + complete_all(&wq->wq_resurrect); wait_for_completion(&wq->wq_dead); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 82c4915f58a2..51e79201636c 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -171,6 +171,7 @@ struct idxd_wq { u32 portal_offset; struct percpu_ref wq_active; struct completion wq_dead; + struct completion wq_resurrect; struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2dbff722e207..912839bf0be3 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -245,6 +245,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); init_completion(&wq->wq_dead); + init_completion(&wq->wq_resurrect); wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index eaaec7a2c740..a3bf3ea84587 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "../dmaengine.h" #include "idxd.h" @@ -27,6 +28,11 @@ struct idxd_resubmit { struct idxd_desc *desc; }; +struct idxd_int_handle_revoke { + struct work_struct work; + struct idxd_device *idxd; +}; + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -99,6 +105,120 @@ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) } } +static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) +{ + LIST_HEAD(flist); + struct idxd_desc *d, *t; + struct llist_node *head; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) + list_add_tail(&d->list, &ie->work_list); + } + + list_for_each_entry_safe(d, t, &ie->work_list, list) { + if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) + list_move_tail(&d->list, &flist); + } + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(d, t, &flist, list) { + list_del(&d->list); + idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true); + } +} + +static void idxd_int_handle_revoke(struct work_struct *work) +{ + struct idxd_int_handle_revoke *revoke = + container_of(work, struct idxd_int_handle_revoke, work); + struct idxd_device *idxd = revoke->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + int i, new_handle, rc; + + if (!idxd->request_int_handles) { + kfree(revoke); + dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); + return; + } + + /* + * The loop attempts to acquire new interrupt handle for all interrupt + * vectors that supports a handle. If a new interrupt handle is acquired and the + * wq is kernel type, the driver will kill the percpu_ref to pause all + * ongoing descriptor submissions. The interrupt handle is then changed. + * After change, the percpu_ref is revived and all the pending submissions + * are woken to try again. A drain is sent to for the interrupt handle + * at the end to make sure all invalid int handle descriptors are processed. + */ + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + struct idxd_wq *wq = ie->wq; + + rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); + if (rc < 0) { + dev_warn(dev, "get int handle %d failed: %d\n", i, rc); + /* + * Failed to acquire new interrupt handle. Kill the WQ + * and release all the pending submitters. The submitters will + * get error return code and handle appropriately. + */ + ie->int_handle = INVALID_INT_HANDLE; + idxd_wq_quiesce(wq); + idxd_abort_invalid_int_handle_descs(ie); + continue; + } + + /* No change in interrupt handle, nothing needs to be done */ + if (ie->int_handle == new_handle) + continue; + + if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { + /* + * All the MSIX interrupts are allocated at once during probe. + * Therefore we need to update all interrupts even if the WQ + * isn't supporting interrupt operations. + */ + ie->int_handle = new_handle; + continue; + } + + mutex_lock(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); + + /* Kill percpu_ref to pause additional descriptor submissions */ + percpu_ref_kill(&wq->wq_active); + + /* Wait for all submitters quiesce before we change interrupt handle */ + wait_for_completion(&wq->wq_dead); + + ie->int_handle = new_handle; + + /* Revive percpu ref and wake up all the waiting submitters */ + percpu_ref_reinit(&wq->wq_active); + complete_all(&wq->wq_resurrect); + mutex_unlock(&wq->wq_lock); + + /* + * The delay here is to wait for all possible MOVDIR64B that + * are issued before percpu_ref_kill() has happened to have + * reached the PCIe domain before the drain is issued. The driver + * needs to ensure that the drain descriptor issued does not pass + * all the other issued descriptors that contain the invalid + * interrupt handle in order to ensure that the drain descriptor + * interrupt will allow the cleanup of all the descriptors with + * invalid interrupt handle. + */ + if (wq_dedicated(wq)) + udelay(100); + idxd_int_handle_revoke_drain(ie); + } + kfree(revoke); +} + static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; @@ -145,6 +265,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) err = true; } + if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { + struct idxd_int_handle_revoke *revoke; + + val |= IDXD_INTC_INT_HANDLE_REVOKED; + + revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC); + if (revoke) { + revoke->idxd = idxd; + INIT_WORK(&revoke->work, idxd_int_handle_revoke); + queue_work(idxd->wq, &revoke->work); + + } else { + dev_err(dev, "Failed to allocate work for int handle revoke\n"); + idxd_wqs_quiesce(idxd); + } + } + if (cause & IDXD_INTC_CMD) { val |= IDXD_INTC_CMD; complete(idxd->cmd_done); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 262c8220adbd..8e396698c22b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -158,6 +158,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 union idxd_command_reg { diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index df02c5c814e7..776fa81db61d 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -127,14 +127,18 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; struct idxd_irq_entry *ie = NULL; + u32 desc_flags = desc->hw->flags; void __iomem *portal; int rc; if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - if (!percpu_ref_tryget_live(&wq->wq_active)) - return -ENXIO; + if (!percpu_ref_tryget_live(&wq->wq_active)) { + wait_for_completion(&wq->wq_resurrect); + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; + } portal = idxd_wq_portal_addr(wq); @@ -149,7 +153,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + if (desc_flags & IDXD_OP_FLAG_RCI) { ie = wq->ie; if (ie->int_handle == INVALID_INT_HANDLE) desc->hw->int_handle = ie->id; From 58fe107660483138a7a77acd673b911016e4ad31 Mon Sep 17 00:00:00 2001 From: Adrian Larumbe Date: Mon, 1 Nov 2021 18:08:23 +0000 Subject: [PATCH 015/493] dmaengine: Add documentation for new memcpy scatter-gather function Documentation describes semantics, limitations and a typical use case scenario. Signed-off-by: Adrian Larumbe Link: https://lore.kernel.org/r/20211101180825.241048-2-adrianml@alumnos.upm.es Signed-off-by: Vinod Koul --- .../driver-api/dmaengine/provider.rst | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst index ddb0a81a796c..0072c9c7efd3 100644 --- a/Documentation/driver-api/dmaengine/provider.rst +++ b/Documentation/driver-api/dmaengine/provider.rst @@ -162,6 +162,29 @@ Currently, the types available are: - The device is able to do memory to memory copies +- - DMA_MEMCPY_SG + + - The device supports memory to memory scatter-gather transfers. + + - Even though a plain memcpy can look like a particular case of a + scatter-gather transfer, with a single chunk to copy, it's a distinct + transaction type in the mem2mem transfer case. This is because some very + simple devices might be able to do contiguous single-chunk memory copies, + but have no support for more complex SG transfers. + + - No matter what the overall size of the combined chunks for source and + destination is, only as many bytes as the smallest of the two will be + transmitted. That means the number and size of the scatter-gather buffers in + both lists need not be the same, and that the operation functionally is + equivalent to a ``strncpy`` where the ``count`` argument equals the smallest + total size of the two scatter-gather list buffers. + + - It's usually used for copying pixel data between host memory and + memory-mapped GPU device memory, such as found on modern PCI video graphics + cards. The most immediate example is the OpenGL API function + ``glReadPielx()``, which might require a verbatim copy of a huge framebuffer + from local device memory onto host memory. + - DMA_XOR - The device is able to perform XOR operations on memory areas From 3218910fd5858842a1dd98ce92b602f0878f8210 Mon Sep 17 00:00:00 2001 From: Adrian Larumbe Date: Mon, 1 Nov 2021 18:08:24 +0000 Subject: [PATCH 016/493] dmaengine: Add core function and capability check for DMA_MEMCPY_SG This is the old DMA_SG interface that was removed in commit c678fa66341c ("dmaengine: remove DMA_SG as it is dead code in kernel"). It has been renamed to DMA_MEMCPY_SG to better match the MEMSET and MEMSET_SG naming convention. It should only be used for mem2mem copies, either main system memory or CPU-addressable device memory (like video memory on a PCI graphics card). Bringing back this interface was prompted by the need to use the Xilinx CDMA device for mem2mem SG transfers. Signed-off-by: Adrian Larumbe Link: https://lore.kernel.org/r/20211101180825.241048-3-adrianml@alumnos.upm.es Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 7 +++++++ include/linux/dmaengine.h | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index d9f7c097cfd6..2cfa8458b51b 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1159,6 +1159,13 @@ int dma_async_device_register(struct dma_device *device) return -EIO; } + if (dma_has_cap(DMA_MEMCPY_SG, device->cap_mask) && !device->device_prep_dma_memcpy_sg) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_MEMCPY_SG"); + return -EIO; + } + if (dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor) { dev_err(device->dev, "Device claims capability %s, but op is not defined\n", diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 9000f3ffce8b..554a86665de9 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -50,6 +50,7 @@ enum dma_status { */ enum dma_transaction_type { DMA_MEMCPY, + DMA_MEMCPY_SG, DMA_XOR, DMA_PQ, DMA_XOR_VAL, @@ -891,6 +892,11 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_memcpy_sg)( + struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags); @@ -1051,6 +1057,20 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy( len, flags); } +static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy_sg( + struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags) +{ + if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy_sg) + return NULL; + + return chan->device->device_prep_dma_memcpy_sg(chan, dst_sg, dst_nents, + src_sg, src_nents, + flags); +} + static inline bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan, enum dma_desc_metadata_mode mode) { From 29cf37fa6dd9c4de81b745ad3ae77f3709931d1e Mon Sep 17 00:00:00 2001 From: Adrian Larumbe Date: Mon, 1 Nov 2021 18:08:25 +0000 Subject: [PATCH 017/493] dmaengine: Add consumer for the new DMA_MEMCPY_SG API function. This new CDMA binding for device_prep_dma_memcpy_sg was partially borrowed from xlnx kernel tree, an expanded with extended address space support when linking descriptor segments and checking for incorrect zero transfer size. Signed-off-by: Adrian Larumbe Link: https://lore.kernel.org/r/20211101180825.241048-4-adrianml@alumnos.upm.es Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 122 ++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 4677ce08ed40..61618148f9d4 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2127,6 +2127,126 @@ error: return NULL; } +/** + * xilinx_cdma_prep_memcpy_sg - prepare descriptors for a memcpy_sg transaction + * @dchan: DMA channel + * @dst_sg: Destination scatter list + * @dst_sg_len: Number of entries in destination scatter list + * @src_sg: Source scatter list + * @src_sg_len: Number of entries in source scatter list + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *xilinx_cdma_prep_memcpy_sg( + struct dma_chan *dchan, struct scatterlist *dst_sg, + unsigned int dst_sg_len, struct scatterlist *src_sg, + unsigned int src_sg_len, unsigned long flags) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_cdma_tx_segment *segment, *prev = NULL; + struct xilinx_cdma_desc_hw *hw; + size_t len, dst_avail, src_avail; + dma_addr_t dma_dst, dma_src; + + if (unlikely(dst_sg_len == 0 || src_sg_len == 0)) + return NULL; + + if (unlikely(!dst_sg || !src_sg)) + return NULL; + + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + dst_avail = sg_dma_len(dst_sg); + src_avail = sg_dma_len(src_sg); + /* + * loop until there is either no more source or no more destination + * scatterlist entry + */ + while (true) { + len = min_t(size_t, src_avail, dst_avail); + len = min_t(size_t, len, chan->xdev->max_buffer_len); + if (len == 0) + goto fetch; + + /* Allocate the link descriptor from DMA pool */ + segment = xilinx_cdma_alloc_tx_segment(chan); + if (!segment) + goto error; + + dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - + dst_avail; + dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - + src_avail; + hw = &segment->hw; + hw->control = len; + hw->src_addr = dma_src; + hw->dest_addr = dma_dst; + if (chan->ext_addr) { + hw->src_addr_msb = upper_32_bits(dma_src); + hw->dest_addr_msb = upper_32_bits(dma_dst); + } + + if (prev) { + prev->hw.next_desc = segment->phys; + if (chan->ext_addr) + prev->hw.next_desc_msb = + upper_32_bits(segment->phys); + } + + prev = segment; + dst_avail -= len; + src_avail -= len; + list_add_tail(&segment->node, &desc->segments); + +fetch: + /* Fetch the next dst scatterlist entry */ + if (dst_avail == 0) { + if (dst_sg_len == 0) + break; + dst_sg = sg_next(dst_sg); + if (dst_sg == NULL) + break; + dst_sg_len--; + dst_avail = sg_dma_len(dst_sg); + } + /* Fetch the next src scatterlist entry */ + if (src_avail == 0) { + if (src_sg_len == 0) + break; + src_sg = sg_next(src_sg); + if (src_sg == NULL) + break; + src_sg_len--; + src_avail = sg_dma_len(src_sg); + } + } + + if (list_empty(&desc->segments)) { + dev_err(chan->xdev->dev, + "%s: Zero-size SG transfer requested\n", __func__); + goto error; + } + + /* Link the last hardware descriptor with the first. */ + segment = list_first_entry(&desc->segments, + struct xilinx_cdma_tx_segment, node); + desc->async_tx.phys = segment->phys; + prev->hw.next_desc = segment->phys; + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + /** * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction * @dchan: DMA channel @@ -3115,7 +3235,9 @@ static int xilinx_dma_probe(struct platform_device *pdev) DMA_RESIDUE_GRANULARITY_SEGMENT; } else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask); + dma_cap_set(DMA_MEMCPY_SG, xdev->common.cap_mask); xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy; + xdev->common.device_prep_dma_memcpy_sg = xilinx_cdma_prep_memcpy_sg; /* Residue calculation is supported by only AXI DMA and CDMA */ xdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; From 2bfab6f8b4f1814eabfcbf335c2fc451592e9fc5 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Tue, 16 Nov 2021 01:33:06 +0000 Subject: [PATCH 018/493] dmaengine: qcom: gpi: Remove unnecessary print function dev_err() The print function dev_err() is redundant because platform_get_irq() already prints an error. Signed-off-by: Xu Wang Link: https://lore.kernel.org/r/20211116013306.784-1-vulab@iscas.ac.cn Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 1a1b7d8458c9..94f3648f7483 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -2206,10 +2206,8 @@ static int gpi_probe(struct platform_device *pdev) /* set up irq */ ret = platform_get_irq(pdev, i); - if (ret < 0) { - dev_err(gpi_dev->dev, "platform_get_irq failed for %d:%d\n", i, ret); + if (ret < 0) return ret; - } gpii->irq = ret; /* set up channel specific register info */ From 37829227f04247125894a7857e3299e8764c2d6f Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 18 Nov 2021 10:09:51 +0000 Subject: [PATCH 019/493] Documentation: dmaengine: Add a description of what dmatest does Currently it can difficult to determine what dmatest does without reading the source code. Let's add a description. The description is taken mostly from the patch header of commit 4a776f0aa922 ("dmatest: Simple DMA memcpy test client"). It has been edited and updated slightly. Nevertheless the new text was largely written by Haarvard Skinnemoen and was copied from another patch, already committed to the kernel, which has Haarvard's SoB: attached to it. Signed-off-by: Daniel Thompson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211118100952.27268-2-daniel.thompson@linaro.org Signed-off-by: Vinod Koul --- Documentation/driver-api/dmaengine/dmatest.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst index ee268d445d38..529cc2cbbb1b 100644 --- a/Documentation/driver-api/dmaengine/dmatest.rst +++ b/Documentation/driver-api/dmaengine/dmatest.rst @@ -6,6 +6,16 @@ Andy Shevchenko This small document introduces how to test DMA drivers using dmatest module. +The dmatest module tests DMA memcpy, memset, XOR and RAID6 P+Q operations using +various lengths and various offsets into the source and destination buffers. It +will initialize both buffers with a repeatable pattern and verify that the DMA +engine copies the requested region and nothing more. It will also verify that +the bytes aren't swapped around, and that the source buffer isn't modified. + +The dmatest module can be configured to test a specific channel. It can also +test multiple channels at the same time, and it can start multiple threads +competing for the same channel. + .. note:: The test suite works only on the channels that have at least one capability of the following: DMA_MEMCPY (memory-to-memory), DMA_MEMSET From c61d7b2ef141abf81140756b45860a2306f395a2 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 18 Nov 2021 10:09:52 +0000 Subject: [PATCH 020/493] Documentation: dmaengine: Correctly describe dmatest with channel unset Currently the documentation states that channels must be configured before running the dmatest. This has not been true since commit 6b41030fdc79 ("dmaengine: dmatest: Restore default for channel"). Fix accordingly. Fixes: 6b41030fdc79 ("dmaengine: dmatest: Restore default for channel") Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20211118100952.27268-3-daniel.thompson@linaro.org Signed-off-by: Vinod Koul --- Documentation/driver-api/dmaengine/dmatest.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst index 529cc2cbbb1b..cf9859cd0b43 100644 --- a/Documentation/driver-api/dmaengine/dmatest.rst +++ b/Documentation/driver-api/dmaengine/dmatest.rst @@ -153,13 +153,14 @@ Part 5 - Handling channel allocation Allocating Channels ------------------- -Channels are required to be configured prior to starting the test run. -Attempting to run the test without configuring the channels will fail. +Channels do not need to be configured prior to starting a test run. Attempting +to run the test without configuring the channels will result in testing any +channels that are available. Example:: % echo 1 > /sys/module/dmatest/parameters/run - dmatest: Could not start test, no channels configured + dmatest: No channels configured, continue with any Channels are registered using the "channel" parameter. Channels can be requested by their name, once requested, the channel is registered and a pending thread is added to the test list. From 7eafa6eed7f1c6d0d7ed2f90792ab6bce462a750 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sun, 14 Nov 2021 14:08:56 +0800 Subject: [PATCH 021/493] dmaengine: ppc4xx: remove unused variable `rval' The variable used for returning status in `ppc440spe_adma_dma2rxor_prep_src' function is never changed and this function just need to return 0. Thus, the `rval' can be removed and return 0 from `ppc440spe_adma_dma2rxor_prep_src'. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211114060856.239314-1-wangborong@cdjrlc.com Signed-off-by: Vinod Koul --- drivers/dma/ppc4xx/adma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index e2b5129c5f84..5e46e347e28b 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -3240,7 +3240,6 @@ static int ppc440spe_adma_dma2rxor_prep_src( struct ppc440spe_rxor *cursor, int index, int src_cnt, u32 addr) { - int rval = 0; u32 sign; struct ppc440spe_adma_desc_slot *desc = hdesc; int i; @@ -3348,7 +3347,7 @@ static int ppc440spe_adma_dma2rxor_prep_src( break; } - return rval; + return 0; } /** From b7b2b49e59e301e55d195f0f7088e8dc7031280f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 28 Nov 2021 23:55:29 -0800 Subject: [PATCH 022/493] Input: palmas-pwrbutton - use bitfield helpers Use the FIELD_PREP() helper, instead of open-coding the same operation. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/f8831b88346b36fc6e01e0910d0db6c94287d2b4.1637593297.git.geert+renesas@glider.be Signed-off-by: Dmitry Torokhov --- drivers/input/misc/palmas-pwrbutton.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c index f9b05cf09ff5..1e71336f5cf9 100644 --- a/drivers/input/misc/palmas-pwrbutton.c +++ b/drivers/input/misc/palmas-pwrbutton.c @@ -15,6 +15,7 @@ * GNU General Public License for more details. */ +#include #include #include #include @@ -192,8 +193,8 @@ static int palmas_pwron_probe(struct platform_device *pdev) * Setup default hardware shutdown option (long key press) * and debounce. */ - val = config.long_press_time_val << __ffs(PALMAS_LPK_TIME_MASK); - val |= config.pwron_debounce_val << __ffs(PALMAS_PWRON_DEBOUNCE_MASK); + val = FIELD_PREP(PALMAS_LPK_TIME_MASK, config.long_press_time_val) | + FIELD_PREP(PALMAS_PWRON_DEBOUNCE_MASK, config.pwron_debounce_val); error = palmas_update_bits(palmas, PALMAS_PMU_CONTROL_BASE, PALMAS_LONG_PRESS_KEY, PALMAS_LPK_TIME_MASK | From fafc66387dc069140e52739c0c86c8169d44c2dc Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Thu, 18 Nov 2021 11:27:12 -0800 Subject: [PATCH 023/493] Input: wacom_i2c - clean up the query device fields Improve the query device fields to be more verbose. Signed-off-by: Alistair Francis Link: https://lore.kernel.org/r/20211118123545.102872-1-alistair@alistair23.me Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wacom_i2c.c | 44 ++++++++++++++++----------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c index fe4ea6204a4e..141754b2764c 100644 --- a/drivers/input/touchscreen/wacom_i2c.c +++ b/drivers/input/touchscreen/wacom_i2c.c @@ -24,12 +24,19 @@ #define WACOM_IN_PROXIMITY BIT(5) /* Registers */ -#define WACOM_CMD_QUERY0 0x04 -#define WACOM_CMD_QUERY1 0x00 -#define WACOM_CMD_QUERY2 0x33 -#define WACOM_CMD_QUERY3 0x02 -#define WACOM_CMD_THROW0 0x05 -#define WACOM_CMD_THROW1 0x00 +#define WACOM_COMMAND_LSB 0x04 +#define WACOM_COMMAND_MSB 0x00 + +#define WACOM_DATA_LSB 0x05 +#define WACOM_DATA_MSB 0x00 + +/* Report types */ +#define REPORT_FEATURE 0x30 + +/* Requests / operations */ +#define OPCODE_GET_REPORT 0x02 + +#define WACOM_QUERY_REPORT 3 #define WACOM_QUERY_SIZE 19 struct wacom_features { @@ -50,23 +57,24 @@ struct wacom_i2c { static int wacom_query_device(struct i2c_client *client, struct wacom_features *features) { - int ret; - u8 cmd1[] = { WACOM_CMD_QUERY0, WACOM_CMD_QUERY1, - WACOM_CMD_QUERY2, WACOM_CMD_QUERY3 }; - u8 cmd2[] = { WACOM_CMD_THROW0, WACOM_CMD_THROW1 }; + u8 get_query_data_cmd[] = { + WACOM_COMMAND_LSB, + WACOM_COMMAND_MSB, + REPORT_FEATURE | WACOM_QUERY_REPORT, + OPCODE_GET_REPORT, + WACOM_DATA_LSB, + WACOM_DATA_MSB, + }; u8 data[WACOM_QUERY_SIZE]; + int ret; + struct i2c_msg msgs[] = { + /* Request reading of feature ReportID: 3 (Pen Query Data) */ { .addr = client->addr, .flags = 0, - .len = sizeof(cmd1), - .buf = cmd1, - }, - { - .addr = client->addr, - .flags = 0, - .len = sizeof(cmd2), - .buf = cmd2, + .len = sizeof(get_query_data_cmd), + .buf = get_query_data_cmd, }, { .addr = client->addr, From a9328d6de14e5b7c9256729d4d9f0924345d4eb9 Mon Sep 17 00:00:00 2001 From: Hari Nagalla Date: Mon, 22 Nov 2021 06:27:23 -0600 Subject: [PATCH 024/493] dt-bindings: remoteproc: k3-r5f: Update bindings for J721S2 SoCs The TI K3 J721S2 SoCs have three dual-core Arm R5F clusters/subsystems, with 2 R5F cores each, one in MCU voltage domain and the other two in MAIN voltage domain. These clusters are similar to J7200 R5F clusters. Compatible info is updated for intuitively matching to the new J721S2 SoCs. Signed-off-by: Hari Nagalla Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211122122726.8532-2-hnagalla@ti.com Signed-off-by: Mathieu Poirier --- .../devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml index eeef255c4045..d9c7e8c2b268 100644 --- a/Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml @@ -38,6 +38,7 @@ properties: - ti,j721e-r5fss - ti,j7200-r5fss - ti,am64-r5fss + - ti,j721s2-r5fss power-domains: description: | @@ -64,9 +65,9 @@ properties: description: | Configuration Mode for the Dual R5F cores within the R5F cluster. Should be either a value of 1 (LockStep mode) or 0 (Split mode) on - most SoCs (AM65x, J721E, J7200), default is LockStep mode if omitted; - and should be either a value of 0 (Split mode) or 2 (Single-CPU mode) - on AM64x SoCs, default is Split mode if omitted. + most SoCs (AM65x, J721E, J7200, J721s2), default is LockStep mode if + omitted; and should be either a value of 0 (Split mode) or 2 + (Single-CPU mode) on AM64x SoCs, default is Split mode if omitted. # R5F Processor Child Nodes: # ========================== @@ -104,6 +105,7 @@ patternProperties: - ti,j721e-r5f - ti,j7200-r5f - ti,am64-r5f + - ti,j721s2-r5f reg: items: From 83b57e60b86372d3274ac9baf04219bb497da2d2 Mon Sep 17 00:00:00 2001 From: Hari Nagalla Date: Mon, 22 Nov 2021 06:27:24 -0600 Subject: [PATCH 025/493] dt-bindings: remoteproc: k3-dsp: Update bindings for J721S2 SoCs The TI K3 J721S2 SoCs have two TMS320C71x DSP subsystems, and does not have any TMS320C66x DSP subsystems. The C71x DSP subsystem in J721S2 SoCs is a similar to the C71x DSP on J721e with some minor core IP updates. Compatible info is updated for intuitvely matching to the new J721S2 SoCs. Signed-off-by: Hari Nagalla Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211122122726.8532-3-hnagalla@ti.com Signed-off-by: Mathieu Poirier --- .../devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml index 5ec6505ac408..7b56497eec4d 100644 --- a/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml @@ -33,9 +33,11 @@ properties: enum: - ti,j721e-c66-dsp - ti,j721e-c71-dsp + - ti,j721s2-c71-dsp description: Use "ti,j721e-c66-dsp" for C66x DSPs on K3 J721E SoCs Use "ti,j721e-c71-dsp" for C71x DSPs on K3 J721E SoCs + Use "ti,j721s2-c71-dsp" for C71x DSPs on K3 J721S2 SoCs resets: description: | @@ -106,6 +108,7 @@ else: compatible: enum: - ti,j721e-c71-dsp + - ti,j721s2-c71-dsp then: properties: reg: From 3b918d8e9bd514d798a82568ef99b8d8ca90c8b8 Mon Sep 17 00:00:00 2001 From: Hari Nagalla Date: Mon, 22 Nov 2021 06:27:25 -0600 Subject: [PATCH 026/493] remoteproc: k3-dsp: Extend support for C71x DSPs on J721S2 SoCs The K3 J721S2 SoCs have two C71x DSP subsystems in MAIN voltage domain, and there are no C66x DSP subsystems on these SoCs. The C71x DSP subsystem is a slighly updated version of the C71x DSP subsystem on J721e. The C71x DSPs are 64 bit machine with fixed and floating point DSP operations. Extend support to the C71x DSPs with J721S2 compatible strings. Signed-off-by: Hari Nagalla Link: https://lore.kernel.org/r/20211122122726.8532-4-hnagalla@ti.com Signed-off-by: Mathieu Poirier --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index c352fa277c8d..939c5d90b562 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -767,6 +767,7 @@ static const struct k3_dsp_dev_data c71_data = { static const struct of_device_id k3_dsp_of_match[] = { { .compatible = "ti,j721e-c66-dsp", .data = &c66_data, }, { .compatible = "ti,j721e-c71-dsp", .data = &c71_data, }, + { .compatible = "ti,j721s2-c71-dsp", .data = &c71_data, }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, k3_dsp_of_match); From b20dc021ba5a813fec6dd66bb78a658e58cfce8d Mon Sep 17 00:00:00 2001 From: Hari Nagalla Date: Mon, 22 Nov 2021 06:27:26 -0600 Subject: [PATCH 027/493] remoteproc: k3-r5: Extend support for R5F clusters on J721S2 SoCs The K3 J721S2 SoCs have three dual-core R5F subsystems, one in MCU voltage domain and the other two in MAIN voltage domain. These R5F clusters are similar to the R5F clusters in J7200 SoCs. Compatible Info is updated to support J721S2 SoCs. Signed-off-by: Hari Nagalla Link: https://lore.kernel.org/r/20211122122726.8532-5-hnagalla@ti.com Signed-off-by: Mathieu Poirier --- drivers/remoteproc/ti_k3_r5_remoteproc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c index 6499302d00c3..969531c05b13 100644 --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c @@ -1535,7 +1535,7 @@ static const struct k3_r5_soc_data am65_j721e_soc_data = { .single_cpu_mode = false, }; -static const struct k3_r5_soc_data j7200_soc_data = { +static const struct k3_r5_soc_data j7200_j721s2_soc_data = { .tcm_is_double = true, .tcm_ecc_autoinit = true, .single_cpu_mode = false, @@ -1550,8 +1550,9 @@ static const struct k3_r5_soc_data am64_soc_data = { static const struct of_device_id k3_r5_of_match[] = { { .compatible = "ti,am654-r5fss", .data = &am65_j721e_soc_data, }, { .compatible = "ti,j721e-r5fss", .data = &am65_j721e_soc_data, }, - { .compatible = "ti,j7200-r5fss", .data = &j7200_soc_data, }, + { .compatible = "ti,j7200-r5fss", .data = &j7200_j721s2_soc_data, }, { .compatible = "ti,am64-r5fss", .data = &am64_soc_data, }, + { .compatible = "ti,j721s2-r5fss", .data = &j7200_j721s2_soc_data, }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, k3_r5_of_match); From c768968f134b9e7ab8a584ad19030a8d2c7eca2f Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 5 Dec 2021 12:13:49 +0100 Subject: [PATCH 028/493] remoteproc: ingenic: Request IRQ disabled The ingenic remoteproc driver requests its IRQ and then immediately disables it. The disable is necessary since irq_request() normally enables the IRQ. But there is a new flag IRQF_NO_AUTOEN that when specified keeps the IRQ disabled. Use this new flag rather than calling disable_irq(). This slightly reduce the boilerplate code and also avoids a theoretical race condition where the IRQ could fire between irq_request() and disable_irq(). Signed-off-by: Lars-Peter Clausen Acked-by: Paul Cercueil Link: https://lore.kernel.org/r/20211205111349.51213-1-lars@metafoo.de Signed-off-by: Mathieu Poirier --- drivers/remoteproc/ingenic_rproc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/ingenic_rproc.c b/drivers/remoteproc/ingenic_rproc.c index a356738160a4..9902cce28692 100644 --- a/drivers/remoteproc/ingenic_rproc.c +++ b/drivers/remoteproc/ingenic_rproc.c @@ -218,14 +218,13 @@ static int ingenic_rproc_probe(struct platform_device *pdev) if (vpu->irq < 0) return vpu->irq; - ret = devm_request_irq(dev, vpu->irq, vpu_interrupt, 0, "VPU", rproc); + ret = devm_request_irq(dev, vpu->irq, vpu_interrupt, IRQF_NO_AUTOEN, + "VPU", rproc); if (ret < 0) { dev_err(dev, "Failed to request IRQ\n"); return ret; } - disable_irq(vpu->irq); - ret = devm_rproc_add(dev, rproc); if (ret) { dev_err(dev, "Failed to register remote processor\n"); From b85a4d9628341c6a790354a28fd45c2cf7741273 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 29 Nov 2021 17:27:18 -0800 Subject: [PATCH 029/493] Input: palmas-pwrbutton - make a couple of arrays static const Don't populate a couple of arrays on the stack but instead make them static const. Also makes the object code smaller by a few hundred bytes. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20211129231749.619469-1-colin.i.king@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/palmas-pwrbutton.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c index 1e71336f5cf9..2213e06b611d 100644 --- a/drivers/input/misc/palmas-pwrbutton.c +++ b/drivers/input/misc/palmas-pwrbutton.c @@ -116,8 +116,8 @@ static void palmas_pwron_params_ofinit(struct device *dev, struct device_node *np; u32 val; int i, error; - u8 lpk_times[] = { 6, 8, 10, 12 }; - int pwr_on_deb_ms[] = { 15, 100, 500, 1000 }; + static const u8 lpk_times[] = { 6, 8, 10, 12 }; + static const int pwr_on_deb_ms[] = { 15, 100, 500, 1000 }; memset(config, 0, sizeof(*config)); From 8c374ef45416281c7172dc29ed438dfb445795f1 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 6 Dec 2021 23:23:00 -0800 Subject: [PATCH 030/493] Input: ff-core - correct magnitude setting for rumble compatibility When converting a rumble into a periodic effect, for compatibility, the magnitude is effectively calculated using: magnitude = max(strong_rubble / 3 + weak_rubble / 6, 0x7fff); The rumble magnitudes are both u16 and the resulting magnitude is s16. The max is presumably an attempt to limit the result of the calculation to the maximum possible magnitude for the s16 result, and thus should be a min. However in the case of strong = weak = 0xffff, the result of the first part of the calculation is 0x7fff, meaning that the min would be redundant anyway, so simply remove the current max. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20211130135039.13726-1-ckeepax@opensource.cirrus.com Signed-off-by: Dmitry Torokhov --- drivers/input/ff-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c index 1cf5deda06e1..fa8d1a466014 100644 --- a/drivers/input/ff-core.c +++ b/drivers/input/ff-core.c @@ -67,7 +67,7 @@ static int compat_effect(struct ff_device *ff, struct ff_effect *effect) effect->type = FF_PERIODIC; effect->u.periodic.waveform = FF_SINE; effect->u.periodic.period = 50; - effect->u.periodic.magnitude = max(magnitude, 0x7fff); + effect->u.periodic.magnitude = magnitude; effect->u.periodic.offset = 0; effect->u.periodic.phase = 0; effect->u.periodic.envelope.attack_length = 0; From 5504f67944484495a5d8504d11fb998af05fe248 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 15 Nov 2021 12:28:23 +0100 Subject: [PATCH 031/493] perf test sigtrap: Add basic stress test for sigtrap handling Add basic stress test for sigtrap handling as a perf tool built-in test. This allows sanity checking the basic sigtrap functionality from within the perf tool. Committer notes: Reported that !root was getting -EPERM, applied a fixup from Marco to set .exclude_{hv,kernel} that made it work. Signed-off-by: Marco Elver Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Fabian Hemmer Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kasan-dev@googlegroups.com Link: http://lore.kernel.org/lkml/20211115112822.4077224-1-elver@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/sigtrap.c | 156 ++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 159 insertions(+) create mode 100644 tools/perf/tests/sigtrap.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 803ca426f8e6..af2b37ef7c70 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -65,6 +65,7 @@ perf-y += pe-file-parsing.o perf-y += expand-cgroup.o perf-y += perf-time-to-tsc.o perf-y += dlfilter-test.o +perf-y += sigtrap.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 8cb5a1c3489e..f1e6d2a3a578 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -107,6 +107,7 @@ static struct test_suite *generic_tests[] = { &suite__expand_cgroup_events, &suite__perf_time_to_tsc, &suite__dlfilter, + &suite__sigtrap, NULL, }; diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c new file mode 100644 index 000000000000..de409f21f952 --- /dev/null +++ b/tools/perf/tests/sigtrap.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic test for sigtrap support. + * + * Copyright (C) 2021, Google LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cloexec.h" +#include "debug.h" +#include "event.h" +#include "tests.h" +#include "../perf-sys.h" + +#define NUM_THREADS 5 + +static struct { + int tids_want_signal; /* Which threads still want a signal. */ + int signal_count; /* Sanity check number of signals received. */ + volatile int iterate_on; /* Variable to set breakpoint on. */ + siginfo_t first_siginfo; /* First observed siginfo_t. */ +} ctx; + +#define TEST_SIG_DATA (~(unsigned long)(&ctx.iterate_on)) + +static struct perf_event_attr make_event_attr(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_BREAKPOINT, + .size = sizeof(attr), + .sample_period = 1, + .disabled = 1, + .bp_addr = (unsigned long)&ctx.iterate_on, + .bp_type = HW_BREAKPOINT_RW, + .bp_len = HW_BREAKPOINT_LEN_1, + .inherit = 1, /* Children inherit events ... */ + .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */ + .remove_on_exec = 1, /* Required by sigtrap. */ + .sigtrap = 1, /* Request synchronous SIGTRAP on event. */ + .sig_data = TEST_SIG_DATA, + .exclude_kernel = 1, /* To allow */ + .exclude_hv = 1, /* running as !root */ + }; + return attr; +} + +static void +sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused) +{ + if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED)) + ctx.first_siginfo = *info; + __atomic_fetch_sub(&ctx.tids_want_signal, syscall(SYS_gettid), __ATOMIC_RELAXED); +} + +static void *test_thread(void *arg) +{ + pthread_barrier_t *barrier = (pthread_barrier_t *)arg; + pid_t tid = syscall(SYS_gettid); + int i; + + pthread_barrier_wait(barrier); + + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + for (i = 0; i < ctx.iterate_on - 1; i++) + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + + return NULL; +} + +static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier) +{ + int i; + + pthread_barrier_wait(barrier); + for (i = 0; i < NUM_THREADS; i++) + TEST_ASSERT_EQUAL("pthread_join() failed", pthread_join(threads[i], NULL), 0); + + return TEST_OK; +} + +static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier) +{ + int ret; + + ctx.iterate_on = 3000; + + TEST_ASSERT_EQUAL("misfired signal?", ctx.signal_count, 0); + TEST_ASSERT_EQUAL("enable failed", ioctl(fd, PERF_EVENT_IOC_ENABLE, 0), 0); + ret = run_test_threads(threads, barrier); + TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0); + + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on); + TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0); + TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on); +#if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */ + TEST_ASSERT_EQUAL("unexpected si_perf_type", ctx.first_siginfo.si_perf_type, + PERF_TYPE_BREAKPOINT); + TEST_ASSERT_EQUAL("unexpected si_perf_data", ctx.first_siginfo.si_perf_data, + TEST_SIG_DATA); +#endif + + return ret; +} + +static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + struct perf_event_attr attr = make_event_attr(); + struct sigaction action = {}; + struct sigaction oldact; + pthread_t threads[NUM_THREADS]; + pthread_barrier_t barrier; + int i, fd, ret = TEST_FAIL; + + pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1); + + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + if (sigaction(SIGTRAP, &action, &oldact)) { + pr_debug("FAILED sigaction()\n"); + goto out; + } + + fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); + if (fd < 0) { + pr_debug("FAILED sys_perf_event_open()\n"); + goto out_restore_sigaction; + } + + for (i = 0; i < NUM_THREADS; i++) { + if (pthread_create(&threads[i], NULL, test_thread, &barrier)) { + pr_debug("FAILED pthread_create()"); + goto out_close_perf_event; + } + } + + ret = run_stress_test(fd, threads, &barrier); + +out_close_perf_event: + close(fd); +out_restore_sigaction: + sigaction(SIGTRAP, &oldact, NULL); +out: + pthread_barrier_destroy(&barrier); + return ret; +} + +DEFINE_SUITE("Sigtrap", sigtrap); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 8f65098110fc..5bbb8f6a48fc 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -146,6 +146,7 @@ DECLARE_SUITE(pe_file_parsing); DECLARE_SUITE(expand_cgroup_events); DECLARE_SUITE(perf_time_to_tsc); DECLARE_SUITE(dlfilter); +DECLARE_SUITE(sigtrap); /* * PowerPC and S390 do not support creation of instruction breakpoints using the From e9c08f722924c58041d2e0d90ea27140a4625776 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Nov 2021 09:51:48 -0300 Subject: [PATCH 032/493] perf test sigtrap: Print errno string when failing Helps a bit the user figuring out why it is failing: Before: $ perf test sigtrap 73: Sigtrap : FAILED! $ perf test -v sigtrap 73: Sigtrap : --- start --- test child forked, pid 3816772 FAILED sys_perf_event_open() test child finished with -1 ---- end ---- Sigtrap: FAILED! $ After: $ perf test sigtrap 73: Sigtrap : FAILED! $ perf test -v sigtrap 73: Sigtrap : --- start --- test child forked, pid 3816772 FAILED sys_perf_event_open(): Permission denied test child finished with -1 ---- end ---- Sigtrap: FAILED! $ Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Fabian Hemmer Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Marco Elver Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kasan-dev@googlegroups.com Link: http://lore.kernel.org/lkml/YZOpSVOCXe0zWeRs@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sigtrap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c index de409f21f952..1004bf0e7cc9 100644 --- a/tools/perf/tests/sigtrap.c +++ b/tools/perf/tests/sigtrap.c @@ -5,9 +5,11 @@ * Copyright (C) 2021, Google LLC. */ +#include #include #include #include +#include #include #include #include @@ -117,6 +119,7 @@ static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __m struct sigaction oldact; pthread_t threads[NUM_THREADS]; pthread_barrier_t barrier; + char sbuf[STRERR_BUFSIZE]; int i, fd, ret = TEST_FAIL; pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1); @@ -125,19 +128,19 @@ static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __m action.sa_sigaction = sigtrap_handler; sigemptyset(&action.sa_mask); if (sigaction(SIGTRAP, &action, &oldact)) { - pr_debug("FAILED sigaction()\n"); + pr_debug("FAILED sigaction(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out; } fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); if (fd < 0) { - pr_debug("FAILED sys_perf_event_open()\n"); + pr_debug("FAILED sys_perf_event_open(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out_restore_sigaction; } for (i = 0; i < NUM_THREADS; i++) { if (pthread_create(&threads[i], NULL, test_thread, &barrier)) { - pr_debug("FAILED pthread_create()"); + pr_debug("FAILED pthread_create(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close_perf_event; } } From c77a78c29177f9a614915e5158a7b6bb89e0e8db Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 30 Oct 2021 00:30:41 +0800 Subject: [PATCH 033/493] tools build: Enable warnings through HOSTCFLAGS The tools build system uses KBUILD_HOSTCFLAGS symbol for obvious purposes. However this is not set for anything under tools/ As such, host tools apps built have no compiler warnings enabled. Declare HOSTCFLAGS for perf tools build, and also use that symbol in declaration of host_c_flags. HOSTCFLAGS comes from EXTRA_WARNINGS, which is independent of target platform/arch warning flags. Suggested-by: Jiri Olsa Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Laura Abbott Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/1635525041-151876-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Build.include | 2 +- tools/perf/Makefile.config | 5 +++++ tools/perf/Makefile.perf | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/build/Build.include b/tools/build/Build.include index 2cf3b1bde86e..c2a95ab47379 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -99,7 +99,7 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX ### ## HOSTCC C flags -host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) +host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) # output directory for tests below TMPOUT = .tmp_$$$$ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 3df74cf5651a..94bb53b0cebd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -17,6 +17,7 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) +HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) include $(srctree)/tools/scripts/Makefile.arch @@ -211,6 +212,7 @@ endif ifneq ($(WERROR),0) CORE_CFLAGS += -Werror CXXFLAGS += -Werror + HOSTCFLAGS += -Werror endif ifndef DEBUG @@ -290,6 +292,9 @@ CXXFLAGS += -ggdb3 CXXFLAGS += -funwind-tables CXXFLAGS += -Wno-strict-aliasing +HOSTCFLAGS += -Wall +HOSTCFLAGS += -Wextra + # Enforce a non-executable stack, as we may regress (again) in the future by # adding assembler files missing the .GNU-stack linker note. LDFLAGS += -Wl,-z,noexecstack diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 80522bcfafe0..164a37523781 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -226,7 +226,7 @@ else endif export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK -export HOSTCC HOSTLD HOSTAR +export HOSTCC HOSTLD HOSTAR HOSTCFLAGS include $(srctree)/tools/build/Makefile.include From 9a5b2d1afa9f888335ab63e922ba5eed31383020 Mon Sep 17 00:00:00 2001 From: Shunsuke Nakamura Date: Tue, 9 Nov 2021 17:58:29 +0900 Subject: [PATCH 034/493] libperf: Adopt perf_counts_values__scale() from tools/perf/util Move perf_counts_values__scale() from tools/perf/util to tools/lib/perf so that it can be used with libperf. Committer notes: As noted by Jiri, use __s8 instead of s8 on the exported function. Signed-off-by: Shunsuke Nakamura Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rob Herring Link: https://lore.kernel.org/r/20211109085831.3770594-2-nakamura.shun@fujitsu.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 19 +++++++++++++++++++ tools/lib/perf/include/perf/evsel.h | 4 ++++ tools/lib/perf/libperf.map | 1 + tools/perf/util/evsel.c | 19 ------------------- tools/perf/util/evsel.h | 3 --- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8441e3e1aaac..68f83d2c27c1 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -431,3 +431,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel) zfree(&evsel->id); evsel->ids = 0; } + +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled) +{ + s8 scaled = 0; + + if (scale) { + if (count->run == 0) { + scaled = -1; + count->val = 0; + } else if (count->run < count->ena) { + scaled = 1; + count->val = (u64)((double)count->val * count->ena / count->run); + } + } + + if (pscaled) + *pscaled = scaled; +} diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 60eae25076d3..f401c7484bec 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -4,6 +4,8 @@ #include #include +#include +#include struct perf_evsel; struct perf_event_attr; @@ -39,5 +41,7 @@ LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); +LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled); #endif /* __LIBPERF_EVSEL_H */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 71468606e8a7..5979bf92d98f 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -50,6 +50,7 @@ LIBPERF_0.0.1 { perf_mmap__read_init; perf_mmap__read_done; perf_mmap__read_event; + perf_counts_values__scale; local: *; }; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ac0127be0459..656c30b988ce 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1476,25 +1476,6 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, count->run = count->run - tmp.run; } -void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, s8 *pscaled) -{ - s8 scaled = 0; - - if (scale) { - if (count->run == 0) { - scaled = -1; - count->val = 0; - } else if (count->run < count->ena) { - scaled = 1; - count->val = (u64)((double) count->val * count->ena / count->run); - } - } - - if (pscaled) - *pscaled = scaled; -} - static int evsel__read_one(struct evsel *evsel, int cpu, int thread) { struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 29d49a8c1e92..99aa3363def7 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -195,9 +195,6 @@ static inline int evsel__nr_cpus(struct evsel *evsel) return evsel__cpus(evsel)->nr; } -void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, s8 *pscaled); - void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, struct perf_counts_values *count); From f2c4dcf191904d28d710290eea4a623710eee57c Mon Sep 17 00:00:00 2001 From: Shunsuke Nakamura Date: Tue, 9 Nov 2021 17:58:30 +0900 Subject: [PATCH 035/493] libperf: Remove scaling process from perf_mmap__read_self() Remove the scaling process from perf_mmap__read_self(), and unify the counters that can be obtained from perf_evsel__read() to "no scaling". Signed-off-by: Shunsuke Nakamura Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rob Herring Link: https://lore.kernel.org/r/20211109085831.3770594-3-nakamura.shun@fujitsu.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/mmap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index c89dfa5f67b3..aaa457904008 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -353,8 +353,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count count->ena += delta; if (idx) count->run += delta; - - cnt = mul_u64_u64_div64(cnt, count->ena, count->run); } count->val = cnt; From a7f3713f6bf207e6d8dd484704dba6089f7ad8db Mon Sep 17 00:00:00 2001 From: Shunsuke Nakamura Date: Tue, 9 Nov 2021 17:58:31 +0900 Subject: [PATCH 036/493] libperf tests: Add test_stat_multiplexing test Adds a test for a counter obtained using read() system call during multiplexing. $ sudo make tests -C ./tools/lib/perf/ V=1 make: Entering directory '/home/nakamura/build_work/build_kernel/linux_kernel/linux/tools/lib/perf' make -f /home/nakamura/build_work/build_kernel/linux_kernel/linux/tools/build/Makefile.build dir=. obj=libperf make -C /home/nakamura/build_work/build_kernel/linux_kernel/linux/tools/lib/api/ O= libapi.a make -f /home/nakamura/build_work/build_kernel/linux_kernel/linux/tools/build/Makefile.build dir=./fd obj=libapi make -f /home/nakamura/build_work/build_kernel/linux_kernel/linux/tools/build/Makefile.build dir=./fs obj=libapi make -f /home/nakamura/build_work/build_kernel/linux_kernel/linux/tools/build/Makefile.build dir=. obj=tests make -f /home/nakamura/build_work/build_kernel/linux_kernel/linux/tools/build/Makefile.build dir=./tests obj=tests running static: - running tests/test-cpumap.c...OK - running tests/test-threadmap.c...OK - running tests/test-evlist.c... Event 0 -- Raw count = 298049842, run = 270269503, enable = 456262127 Scaled count = 503160191 (59.24%, 270269503/456262127) Event 1 -- Raw count = 299134173, run = 271075173, enable = 456257234 Scaled count = 503484435 (59.41%, 271075173/456257234) Event 2 -- Raw count = 300461996, run = 272069283, enable = 456253417 Scaled count = 503867290 (59.63%, 272069283/456253417) Event 3 -- Raw count = 301308704, run = 273063387, enable = 456249352 Scaled count = 503443183 (59.85%, 273063387/456249352) Event 4 -- Raw count = 302531164, run = 274102932, enable = 456244712 Scaled count = 503563543 (60.08%, 274102932/456244712) Event 5 -- Raw count = 303710254, run = 275406214, enable = 456228165 Scaled count = 503115633 (60.37%, 275406214/456228165) Event 6 -- Raw count = 304531302, run = 276396076, enable = 456221130 Scaled count = 502661313 (60.58%, 276396076/456221130) Event 7 -- Raw count = 304486460, run = 276601890, enable = 456213754 Scaled count = 502205212 (60.63%, 276601890/456213754) Event 8 -- Raw count = 304116681, run = 276631326, enable = 456205562 Scaled count = 501532936 (60.64%, 276631326/456205562) Event 9 -- Raw count = 303567766, run = 276188567, enable = 456196839 Scaled count = 501420666 (60.54%, 276188567/456196839) Event 10 -- Raw count = 302238014, run = 275144001, enable = 456185300 Scaled count = 501106833 (60.31%, 275144001/456185300) Event 11 -- Raw count = 300805716, run = 273824589, enable = 456175608 Scaled count = 501124573 (60.03%, 273824589/456175608) Event 12 -- Raw count = 299959051, run = 272834556, enable = 456166593 Scaled count = 501517477 (59.81%, 272834556/456166593) Event 13 -- Raw count = 299037090, run = 271820805, enable = 456157086 Scaled count = 501830195 (59.59%, 271820805/456157086) Event 14 -- Raw count = 298327042, run = 270784311, enable = 456147546 Scaled count = 502544433 (59.36%, 270784311/456147546) Expected: 501614268 High: 503867290 Low: 298049842 Average: 502438527 Average Error = 0.16% OK - running tests/test-evsel.c... loop = 65536, count = 328182 loop = 131072, count = 660214 loop = 262144, count = 1315534 loop = 524288, count = 2635364 loop = 1048576, count = 5271971 loop = 65536, count = 491952 loop = 131072, count = 850061 loop = 262144, count = 1648608 loop = 524288, count = 3162059 loop = 1048576, count = 6353393 OK running dynamic: - running tests/test-cpumap.c...OK - running tests/test-threadmap.c...OK - running tests/test-evlist.c... Event 0 -- Raw count = 300218292, run = 297528154, enable = 496789343 Scaled count = 501281125 (59.89%, 297528154/496789343) Event 1 -- Raw count = 301438606, run = 298515328, enable = 496784768 Scaled count = 501649643 (60.09%, 298515328/496784768) Event 2 -- Raw count = 302342618, run = 298798983, enable = 496782015 Scaled count = 502673648 (60.15%, 298798983/496782015) Event 3 -- Raw count = 303132319, run = 299230407, enable = 496778508 Scaled count = 503256412 (60.23%, 299230407/496778508) Event 4 -- Raw count = 302758195, run = 299218047, enable = 496774243 Scaled count = 502651743 (60.23%, 299218047/496774243) Event 5 -- Raw count = 303158458, run = 299204274, enable = 496769146 Scaled count = 503334281 (60.23%, 299204274/496769146) Event 6 -- Raw count = 303471397, run = 299197479, enable = 496763124 Scaled count = 503859189 (60.23%, 299197479/496763124) Event 7 -- Raw count = 303583387, run = 299196861, enable = 496756458 Scaled count = 504039405 (60.23%, 299196861/496756458) Event 8 -- Raw count = 303096897, run = 299186924, enable = 496748667 Scaled count = 503240507 (60.23%, 299186924/496748667) Event 9 -- Raw count = 301424173, run = 297845086, enable = 496739994 Scaled count = 502709122 (59.96%, 297845086/496739994) Event 10 -- Raw count = 300876415, run = 296851339, enable = 496729034 Scaled count = 503464297 (59.76%, 296851339/496729034) Event 11 -- Raw count = 300239338, run = 296547963, enable = 496719538 Scaled count = 502902612 (59.70%, 296547963/496719538) Event 12 -- Raw count = 299751948, run = 296547195, enable = 496710036 Scaled count = 502077926 (59.70%, 296547195/496710036) Event 13 -- Raw count = 299341883, run = 296549981, enable = 496700423 Scaled count = 501376663 (59.70%, 296549981/496700423) Event 14 -- Raw count = 299145476, run = 296561684, enable = 496690949 Scaled count = 501018366 (59.71%, 296561684/496690949) Expected: 501669431 High: 504039405 Low: 300218292 Average: 502635662 Average Error = 0.19% OK - running tests/test-evsel.c... loop = 65536, count = 329275 loop = 131072, count = 664638 loop = 262144, count = 1315367 loop = 524288, count = 2629617 loop = 1048576, count = 5273657 loop = 65536, count = 459641 loop = 131072, count = 978402 loop = 262144, count = 1581219 loop = 524288, count = 3774908 loop = 1048576, count = 7694417 OK make: Leaving directory '/home/nakamura/build_work/build_kernel/linux_kernel/linux/tools/lib/perf' Signed-off-by: Shunsuke Nakamura Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rob Herring Link: https://lore.kernel.org/r/20211109085831.3770594-4-nakamura.shun@fujitsu.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 157 +++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ce91a582f0e4..520a78267743 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -21,6 +21,9 @@ #include "tests.h" #include +#define EVENT_NUM 15 +#define WAIT_COUNT 100000000UL + static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { @@ -413,6 +416,159 @@ static int test_mmap_cpus(void) return 0; } +static double display_error(long long average, + long long high, + long long low, + long long expected) +{ + double error; + + error = (((double)average - expected) / expected) * 100.0; + + __T_VERBOSE(" Expected: %lld\n", expected); + __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n", + high, low, average); + + __T_VERBOSE(" Average Error = %.2f%%\n", error); + + return error; +} + +static int test_stat_multiplexing(void) +{ + struct perf_counts_values expected_counts = { .val = 0 }; + struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },}; + struct perf_thread_map *threads; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + int err, i, nonzero = 0; + unsigned long count; + long long max = 0, min = 0, avg = 0; + double error = 0.0; + s8 scaled = 0; + + /* read for non-multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + err = perf_evsel__open(evsel, NULL, threads); + __T("failed to open evsel", err == 0); + + err = perf_evsel__enable(evsel); + __T("failed to enable evsel", err == 0); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + perf_evsel__read(evsel, 0, 0, &expected_counts); + __T("failed to read value for evsel", expected_counts.val != 0); + __T("failed to read non-multiplexing event count", + expected_counts.ena == expected_counts.run); + + err = perf_evsel__disable(evsel); + __T("failed to enable evsel", err == 0); + + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + + perf_thread_map__put(threads); + + /* read for multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evlist = perf_evlist__new(); + __T("failed to create evlist", evlist); + + for (i = 0; i < EVENT_NUM; i++) { + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + perf_evlist__add(evlist, evsel); + } + perf_evlist__set_maps(evlist, NULL, threads); + + err = perf_evlist__open(evlist); + __T("failed to open evsel", err == 0); + + perf_evlist__enable(evlist); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + i = 0; + perf_evlist__for_each_evsel(evlist, evsel) { + perf_evsel__read(evsel, 0, 0, &counts[i]); + __T("failed to read value for evsel", counts[i].val != 0); + i++; + } + + perf_evlist__disable(evlist); + + min = counts[0].val; + for (i = 0; i < EVENT_NUM; i++) { + __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n", + i, counts[i].val, counts[i].run, counts[i].ena); + + perf_counts_values__scale(&counts[i], true, &scaled); + if (scaled == 1) { + __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n", + counts[i].val, + (double)counts[i].run / (double)counts[i].ena * 100.0, + counts[i].run, counts[i].ena); + } else if (scaled == -1) { + __T_VERBOSE("\t Not Runnnig\n"); + } else { + __T_VERBOSE("\t Not Scaling\n"); + } + + if (counts[i].val > max) + max = counts[i].val; + + if (counts[i].val < min) + min = counts[i].val; + + avg += counts[i].val; + + if (counts[i].val != 0) + nonzero++; + } + + if (nonzero != 0) + avg = avg / nonzero; + else + avg = 0; + + error = display_error(avg, max, min, expected_counts.val); + + __T("Error out of range!", ((error <= 1.0) && (error >= -1.0))); + + perf_evlist__close(evlist); + perf_evlist__delete(evlist); + + perf_thread_map__put(threads); + + return 0; +} + int test_evlist(int argc, char **argv) { __T_START; @@ -424,6 +580,7 @@ int test_evlist(int argc, char **argv) test_stat_thread_enable(); test_mmap_thread(); test_mmap_cpus(); + test_stat_multiplexing(); __T_END; return tests_failed == 0 ? 0 : -1; From 4edb117e6472ca0c0730887dba8b30cba0a3705e Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 23 Nov 2021 14:16:12 +0530 Subject: [PATCH 037/493] perf docs: Add info on AMD raw event encoding AMD processors have events with event select codes and unit masks larger than a byte. The core PMU, for example, uses 12-bit event select codes split between bits 0-7 and 32-35 of the PERF_CTL MSRs as can be seen from /sys/bus/event_sources/devices/cpu/format/*. The Processor Programming Reference (PPR) lists the event codes as unified 12-bit hexadecimal values instead and the split between the bits is not apparent to someone who is not aware of the layout of the PERF_CTL MSRs. 8-bit event select codes continue to work as the layout matches that of the PERF_CTL MSRs i.e. bits 0-7 for event select and 8-15 for unit mask. This adds more details in the perf man pages about using /sys/bus/event_sources/devices/*/format/* for determining the correct raw event encoding scheme. E.g. the "op_cache_hit_miss.op_cache_hit" event with code 0x28f and umask 0x03 can be programmed using its symbolic name as: $ sudo perf --debug perf-event-open stat -e op_cache_hit_miss.op_cache_hit sleep 1 ------------------------------------------------------------ perf_event_attr: type 4 size 128 config 0x20000038f sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ [...] One might use a simple eventsel+umask combination based on what the current man pages say and incorrectly program the event as: $ sudo perf --debug perf-event-open stat -e r0328f sleep 1 ------------------------------------------------------------ perf_event_attr: type 4 size 128 config 0x328f sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ [...] When it should have been based on the format from sysfs: $ cat /sys/bus/event_source/devices/cpu/format/event config:0-7,32-35 $ sudo perf --debug perf-event-open stat -e r20000038f sleep 1 ------------------------------------------------------------ perf_event_attr: type 4 size 128 config 0x20000038f sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ [...] Reviewed-by: Kajol Jain Signed-off-by: Sandipan Das Acked-by: Jiri Olsa Cc: Ananth Narayan Cc: Kim Phillips Cc: Ravi Bangoria Cc: Robert Richter Cc: Santosh Shukla Link: https://lore.kernel.org/r/20211123084613.243792-1-sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 34 +++++++++++++++++++++++- tools/perf/Documentation/perf-record.txt | 6 +++-- tools/perf/Documentation/perf-stat.txt | 6 +++-- tools/perf/Documentation/perf-top.txt | 7 ++--- 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 4dc8d0af19df..a922a95289a9 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -94,7 +94,7 @@ RAW HARDWARE EVENT DESCRIPTOR Even when an event is not available in a symbolic form within perf right now, it can be encoded in a per processor specific way. -For instance For x86 CPUs NNN represents the raw register encoding with the +For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). @@ -126,6 +126,38 @@ It's also possible to use pmu syntax: perf record -e cpu/r1a8/ ... perf record -e cpu/r0x1a8/ ... +Some processors, like those from AMD, support event codes and unit masks +larger than a byte. In such cases, the bits corresponding to the event +configuration parameters can be seen with: + + cat /sys/bus/event_source/devices//format/ + +Example: + +If the AMD docs for an EPYC 7713 processor describe an event as: + + Event Umask Event Mask + Num. Value Mnemonic Description + + 28FH 03H op_cache_hit_miss.op_cache_hit Counts Op Cache micro-tag + hit events. + +raw encoding of 0x0328F cannot be used since the upper nibble of the +EventSelect bits have to be specified via bits 32-35 as can be seen with: + + cat /sys/bus/event_source/devices/cpu/format/event + +raw encoding of 0x20000038F should be used instead: + + perf stat -e r20000038f -a sleep 1 + perf record -e r20000038f ... + +It's also possible to use pmu syntax: + + perf record -e r20000038f -a sleep 1 + perf record -e cpu/r20000038f/ ... + perf record -e cpu/r0x20000038f/ ... + You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 3cf7bac67239..55df7b073a55 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -30,8 +30,10 @@ OPTIONS - a symbolic event name (use 'perf list' to list all events) - - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + - a raw PMU event in the form of rN where N is a hexadecimal value + that represents the raw register encoding with the layout of the + event control registers as described by entries in + /sys/bus/event_sources/devices/cpu/format/*. - a symbolic or raw PMU event followed by an optional colon and a list of event modifiers, e.g., cpu-cycles:p. See the diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 7e6fb7cbc0f4..604e6f2301ea 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -36,8 +36,10 @@ report:: - a symbolic event name (use 'perf list' to list all events) - - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + - a raw PMU event in the form of rN where N is a hexadecimal value + that represents the raw register encoding with the layout of the + event control registers as described by entries in + /sys/bus/event_sources/devices/cpu/format/*. - a symbolic or raw PMU event followed by an optional colon and a list of event modifiers, e.g., cpu-cycles:p. See the diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 9898a32b8d9c..cac3dfbee7d8 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -38,9 +38,10 @@ Default is to monitor all CPUS. -e :: --event=:: Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + (use 'perf list' to list all events) or a raw PMU event in the form + of rN where N is a hexadecimal value that represents the raw register + encoding with the layout of the event control registers as described + by entries in /sys/bus/event_sources/devices/cpu/format/*. -E :: --entries=:: From 7a2e14962cd43400c353cdc05550f580a284dcb9 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 23 Nov 2021 14:16:13 +0530 Subject: [PATCH 038/493] perf docs: Update link to AMD documentation This updates the link to documentation on AMD processors. The new link points to a page where users can find the Processor Programming Reference (PPR) documents for the family and model codes corresponding to processors they are using. Signed-off-by: Sandipan Das Acked-by: Jiri Olsa Cc: Ananth Narayan Cc: Kajol Jain Cc: Kim Phillips Cc: Ravi Bangoria Cc: Robert Richter Cc: Santosh Shukla Link: https://lore.kernel.org/r/20211123084613.243792-2-sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index a922a95289a9..57384a97c04f 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -81,7 +81,11 @@ On AMD systems it is implemented using IBS (up to precise-level 2). The precise modifier works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1 (micro-ops retired). Both events map to IBS execution sampling (IBS op) with the IBS Op Counter Control bit -(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s +(IbsOpCntCtl) set respectively (see the +Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS) +section of the [AMD Processor Programming Reference (PPR)] relevant to the +family, model and stepping of the processor being used). + Manual Volume 2: System Programming, 13.3 Instruction-Based Sampling). Examples to use IBS: @@ -96,8 +100,10 @@ it can be encoded in a per processor specific way. For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout -of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, -Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). +of IA32_PERFEVTSELx MSRs) or AMD's PERF_CTL MSRs (see the +Core Complex (CCX) -> Processor x86 Core -> MSR Registers section of the +[AMD Processor Programming Reference (PPR)] relevant to the family, model +and stepping of the processor being used). Note: Only the following bit fields can be set in x86 counter registers: event, umask, edge, inv, cmask. Esp. guest/host only and @@ -348,4 +354,4 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], -http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] +https://bugzilla.kernel.org/show_bug.cgi?id=206537[AMD Processor Programming Reference (PPR)] From b4515ad6e1c8b195e3bd02a5a15b1c74119ea367 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Tue, 23 Nov 2021 15:40:17 +0800 Subject: [PATCH 039/493] perf trace: Enable ignore_missing_thread for trace perf already support ignore_missing_thread for -u/-p, but not yet applied to `perf trace`. This patch enables ignore_missing_thread for `perf trace`. Signed-off-by: Gang Li Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-6-git-send-email-jolsa@kernel.org Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com Link: http://lore.kernel.org/lkml/20211123074018.11406-1-ligang.bdlg@bytedance.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0b52e08e558e..94d62a92f1a1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3950,6 +3950,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__add(evlist, pgfault_min); } + /* Enable ignoring missing threads when -u/-p option is defined. */ + trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid; + if (trace->sched && evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; From 6b6b16b3bb612757f7bc697496b9f5d6765512a6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Nov 2021 17:52:26 -0800 Subject: [PATCH 040/493] perf metric: Reduce multiplexing with duration_time It is common to use the same counters with and without duration_time. The ID sharing code treats duration_time as if it were a hardware event placed in the same group. This causes unnecessary multiplexing such as in the following example where l3_cache_access isn't shared: $ perf stat -M l3 -a sleep 1 Performance counter stats for 'system wide': 3,117,007 l3_cache_miss # 199.5 MB/s l3_rd_bw # 43.6 % l3_hits # 56.4 % l3_miss (50.00%) 5,526,447 l3_cache_access (50.00%) 5,392,435 l3_cache_access # 5389191.2 access/s l3_access_rate (50.00%) 1,000,601,901 ns duration_time 1.000601901 seconds time elapsed Fix this by placing duration_time in all groups unless metric sharing has been disabled on the command line: $ perf stat -M l3 -a sleep 1 Performance counter stats for 'system wide': 3,597,972 l3_cache_miss # 230.3 MB/s l3_rd_bw # 48.0 % l3_hits # 52.0 % l3_miss 6,914,459 l3_cache_access # 6909935.9 access/s l3_access_rate 1,000,654,579 ns duration_time 1.000654579 seconds time elapsed $ perf stat --metric-no-merge -M l3 -a sleep 1 Performance counter stats for 'system wide': 3,501,834 l3_cache_miss # 53.5 % l3_miss (24.99%) 6,548,173 l3_cache_access (24.99%) 3,417,622 l3_cache_miss # 45.7 % l3_hits (25.04%) 6,294,062 l3_cache_access (25.04%) 5,923,238 l3_cache_access # 5919688.1 access/s l3_access_rate (24.99%) 1,000,599,683 ns duration_time 3,607,486 l3_cache_miss # 230.9 MB/s l3_rd_bw (49.97%) 1.000599683 seconds time elapsed v2. Doesn't count duration_time in the metric_list_cmp function that sorts larger metrics first. Without this a metric with duration_time and an event is sorted the same as a metric with two events, possibly not allowing the first metric to share with the second. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211124015226.3317994-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 42 +++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index fffe02aae3ed..51c99cb08abf 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1115,13 +1115,27 @@ out: return ret; } +/** + * metric_list_cmp - list_sort comparator that sorts metrics with more events to + * the front. duration_time is excluded from the count. + */ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, const struct list_head *r) { const struct metric *left = container_of(l, struct metric, nd); const struct metric *right = container_of(r, struct metric, nd); + struct expr_id_data *data; + int left_count, right_count; - return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids); + left_count = hashmap__size(left->pctx->ids); + if (!expr__get_id(left->pctx, "duration_time", &data)) + left_count--; + + right_count = hashmap__size(right->pctx->ids); + if (!expr__get_id(right->pctx, "duration_time", &data)) + right_count--; + + return right_count - left_count; } /** @@ -1299,14 +1313,16 @@ err_out: /** * parse_ids - Build the event string for the ids and parse them creating an * evlist. The encoded metric_ids are decoded. + * @metric_no_merge: is metric sharing explicitly disabled. * @fake_pmu: used when testing metrics not supported by the current CPU. * @ids: the event identifiers parsed from a metric. * @modifier: any modifiers added to the events. * @has_constraint: false if events should be placed in a weak group. * @out_evlist: the created list of events. */ -static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, - const char *modifier, bool has_constraint, struct evlist **out_evlist) +static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, + struct expr_parse_ctx *ids, const char *modifier, + bool has_constraint, struct evlist **out_evlist) { struct parse_events_error parse_error; struct evlist *parsed_evlist; @@ -1314,12 +1330,19 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, int ret; *out_evlist = NULL; - if (hashmap__size(ids->ids) == 0) { + if (!metric_no_merge || hashmap__size(ids->ids) == 0) { char *tmp; /* - * No ids/events in the expression parsing context. Events may - * have been removed because of constant evaluation, e.g.: - * event1 if #smt_on else 0 + * We may fail to share events between metrics because + * duration_time isn't present in one metric. For example, a + * ratio of cache misses doesn't need duration_time but the same + * events may be used for a misses per second. Events without + * sharing implies multiplexing, that is best avoided, so place + * duration_time in every group. + * + * Also, there may be no ids/events in the expression parsing + * context because of constant evaluation, e.g.: + * event1 if #smt_on else 0 * Add a duration_time event to avoid a parse error on an empty * string. */ @@ -1387,7 +1410,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, ret = build_combined_expr_ctx(&metric_list, &combined); if (!ret && combined && hashmap__size(combined->ids)) { - ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL, + ret = parse_ids(metric_no_merge, fake_pmu, combined, + /*modifier=*/NULL, /*has_constraint=*/true, &combined_evlist); } @@ -1435,7 +1459,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, } } if (!metric_evlist) { - ret = parse_ids(fake_pmu, m->pctx, m->modifier, + ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier, m->has_constraint, &m->evlist); if (ret) goto out; From ecdcf630d71f3b4c64097cad0add561cd5010c02 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 30 Nov 2021 09:49:44 -0800 Subject: [PATCH 041/493] perf evlist: Allow setting arbitrary leader The leader of a group is the first, but allow it to be an arbitrary list member so that for Intel topdown events slots may always be the group leader. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Vineet Singh Link: http://lore.kernel.org/lkml/20211130174945.247604-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 15 +++++++++------ tools/lib/perf/include/internal/evlist.h | 2 +- tools/perf/util/parse-events.c | 4 ++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index e37dfad31383..245acbc53bd3 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; } -void __perf_evlist__set_leader(struct list_head *list) +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader) { - struct perf_evsel *evsel, *leader; + struct perf_evsel *first, *last, *evsel; - leader = list_entry(list->next, struct perf_evsel, node); - evsel = list_entry(list->prev, struct perf_evsel, node); + first = list_first_entry(list, struct perf_evsel, node); + last = list_last_entry(list, struct perf_evsel, node); - leader->nr_members = evsel->idx - leader->idx + 1; + leader->nr_members = last->idx - first->idx + 1; __perf_evlist__for_each_entry(list, evsel) evsel->leader = leader; @@ -659,7 +659,10 @@ void __perf_evlist__set_leader(struct list_head *list) void perf_evlist__set_leader(struct perf_evlist *evlist) { if (evlist->nr_entries) { + struct perf_evsel *first = list_entry(evlist->entries.next, + struct perf_evsel, node); + evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; - __perf_evlist__set_leader(&evlist->entries); + __perf_evlist__set_leader(&evlist->entries, first); } } diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index f366dbad6a88..6f74269a3ad4 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -127,5 +127,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, void perf_evlist__reset_id_hash(struct perf_evlist *evlist); -void __perf_evlist__set_leader(struct list_head *list); +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ba74fdf74af9..1d68167ab611 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1837,8 +1837,8 @@ void parse_events__set_leader(char *name, struct list_head *list, if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) return; - __perf_evlist__set_leader(list); - leader = list_entry(list->next, struct evsel, core.node); + leader = list_first_entry(list, struct evsel, core.node); + __perf_evlist__set_leader(list, &leader->core); leader->group_name = name ? strdup(name) : NULL; } From 94dbfd6781a0e87b6faa6012810eb22e7d5b8a70 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 30 Nov 2021 09:49:45 -0800 Subject: [PATCH 042/493] perf parse-events: Architecture specific leader override Currently topdown events must appear after a slots event: $ perf stat -e '{slots,topdown-fe-bound}' /bin/true Performance counter stats for '/bin/true': 3,183,090 slots 986,133 topdown-fe-bound Reversing the events yields: $ perf stat -e '{topdown-fe-bound,slots}' /bin/true Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (topdown-fe-bound). For metrics the order of events is determined by iterating over a hashmap, and so slots isn't guaranteed to be first which can yield this error. Change the set_leader in parse-events, called when a group is closed, so that rather than always making the first event the leader, if the slots event exists then it is made the leader. It is then moved to the head of the evlist otherwise it won't be opened in the correct order. The result is: $ perf stat -e '{topdown-fe-bound,slots}' /bin/true Performance counter stats for '/bin/true': 3,274,795 slots 1,001,702 topdown-fe-bound A problem with this approach is the slots event is identified by name, names can be overwritten like 'cpu/slots,name=foo/' and this causes the leader change to fail. The change also modifies and fixes mixed groups like, with the change: $ perf stat -e '{instructions,slots,topdown-fe-bound}' -a -- sleep 2 Performance counter stats for 'system wide': 5574985410 slots 971981616 instructions 1348461887 topdown-fe-bound 2.001263120 seconds time elapsed Without the change: $ perf stat -e '{instructions,slots,topdown-fe-bound}' -a -- sleep 2 Performance counter stats for 'system wide': instructions slots topdown-fe-bound 2.006247990 seconds time elapsed Something that may be undesirable here is that the events are reordered in the output. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Vineet Singh Link: http://lore.kernel.org/lkml/20211130174945.247604-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/evlist.c | 17 +++++++++++++++++ tools/perf/util/evlist.h | 1 + tools/perf/util/parse-events.c | 8 +++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 0b0951030a2f..f924246eff78 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -17,3 +17,20 @@ int arch_evlist__add_default_attrs(struct evlist *evlist) else return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); } + +struct evsel *arch_evlist__leader(struct list_head *list) +{ + struct evsel *evsel, *first; + + first = list_first_entry(list, struct evsel, core.node); + + if (!pmu_have_event("cpu", "slots")) + return first; + + __evlist__for_each_entry(list, evsel) { + if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && + evsel->name && strstr(evsel->name, "slots")) + return evsel; + } + return first; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 97bfb8d0be4f..993437ffe429 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -110,6 +110,7 @@ int __evlist__add_default_attrs(struct evlist *evlist, __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) int arch_evlist__add_default_attrs(struct evlist *evlist); +struct evsel *arch_evlist__leader(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 1d68167ab611..acf20ce98ce9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1824,6 +1824,11 @@ out: return ret; } +__weak struct evsel *arch_evlist__leader(struct list_head *list) +{ + return list_first_entry(list, struct evsel, core.node); +} + void parse_events__set_leader(char *name, struct list_head *list, struct parse_events_state *parse_state) { @@ -1837,9 +1842,10 @@ void parse_events__set_leader(char *name, struct list_head *list, if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) return; - leader = list_first_entry(list, struct evsel, core.node); + leader = arch_evlist__leader(list); __perf_evlist__set_leader(list, &leader->core); leader->group_name = name ? strdup(name) : NULL; + list_move(&leader->core.node, list); } /* list_event is assumed to point to malloc'ed memory */ From ed17b1914978eddb2b01f2d34577f1c82518c650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 3 Dec 2021 22:05:44 +0100 Subject: [PATCH 043/493] perf tools: Drop requirement for libstdc++.so for libopencsd check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's possible to link against libopencsd_c_api without having libstdc++.so available, only libstdc++.so.6.0.28 (or whatever version is in use) needs to be available. The same holds true for libopencsd.so. When -lstdc++ (or -lopencsd) is explicitly passed to the linker however the .so file must be available. So wrap adding the dependencies into a check for static linking that actually requires adding them all. The same construct is already used for some other tests in the same file to reduce dependencies in the dynamic linking case. Fixes: 573cf5c9a152 ("perf build: Add missing -lstdc++ when linking with libopencsd") Reviewed-by: James Clark Signed-off-by: Uwe Kleine-König Cc: Adrian Bunk Cc: Alexander Shishkin Cc: Branislav Rankov Cc: Diederik de Haas Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/all/20211203210544.1137935-1-uwe@kleine-koenig.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 94bb53b0cebd..96ad944ca6a8 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -144,7 +144,10 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif -OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++ +OPENCSDLIBS := -lopencsd_c_api +ifeq ($(findstring -static,${LDFLAGS}),-static) + OPENCSDLIBS += -lopencsd -lstdc++ +endif ifdef CSLIBS LIBOPENCSD_LDFLAGS := -L$(CSLIBS) endif From e69dc84282fb474cb87097c6c945d8f90e05a4d9 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 9 Sep 2021 14:22:15 +0800 Subject: [PATCH 044/493] perf stat: Support --cputype option for hybrid events In previous patch, we have supported the syntax which enables the event on a specified pmu, such as: cpu_core// cpu_atom// While this syntax is not very easy for applying on a set of events or applying on a group. In following example, we have to explicitly assign the pmu prefix. # ./perf stat -e '{cpu_core/cycles/,cpu_core/instructions/}' -- sleep 1 Performance counter stats for 'sleep 1': 1,158,545 cpu_core/cycles/ 1,003,113 cpu_core/instructions/ 1.002428712 seconds time elapsed A much easier way is: # ./perf stat --cputype core -e '{cycles,instructions}' -- sleep 1 Performance counter stats for 'sleep 1': 1,101,071 cpu_core/cycles/ 939,892 cpu_core/instructions/ 1.002363142 seconds time elapsed For this example, the '--cputype' enables the events from specified pmu (cpu_core). If '--cputype' conflicts with pmu prefix, '--cputype' is ignored. # ./perf stat --cputype core -e cycles,cpu_atom/instructions/ -a -- sleep 1 Performance counter stats for 'system wide': 21,003,407 cpu_core/cycles/ 367,886 cpu_atom/instructions/ 1.002203520 seconds time elapsed Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210909062215.10278-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 ++++ tools/perf/builtin-stat.c | 24 ++++++++++++++++++++++++ tools/perf/util/evlist.h | 1 + tools/perf/util/parse-events-hybrid.c | 9 ++++++--- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 604e6f2301ea..c06c341e72b9 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -495,6 +495,10 @@ This option can be enabled in perf config by setting the variable $ perf config stat.no-csv-summary=true +--cputype:: +Only enable events on applying cpu with this type for hybrid platform +(e.g. core or atom)" + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7974933dbc77..f6ca2b054c5b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1168,6 +1168,26 @@ static int parse_stat_cgroups(const struct option *opt, return parse_cgroups(opt, str, unset); } +static int parse_hybrid_type(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + struct evlist *evlist = *(struct evlist **)opt->value; + + if (!list_empty(&evlist->core.entries)) { + fprintf(stderr, "Must define cputype before events/metrics\n"); + return -1; + } + + evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str); + if (!evlist->hybrid_pmu_name) { + fprintf(stderr, "--cputype %s is not supported!\n", str); + return -1; + } + + return 0; +} + static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1282,6 +1302,10 @@ static struct option stat_options[] = { "don't print 'summary' for CSV summary output"), OPT_BOOLEAN(0, "quiet", &stat_config.quiet, "don't print output (useful with record)"), + OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", + "Only enable events on applying cpu with this type " + "for hybrid platform (e.g. core or atom)", + parse_hybrid_type), #ifdef HAVE_LIBPFM OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", "libpfm4 event selector. use 'perf list' to list available events", diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 993437ffe429..27594900a052 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -64,6 +64,7 @@ struct evlist { struct evsel *selected; struct events_stats stats; struct perf_env *env; + const char *hybrid_pmu_name; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 9fc86971027b..284f8eabd3b9 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -63,10 +63,13 @@ static int create_event_hybrid(__u32 config_type, int *idx, static int pmu_cmp(struct parse_events_state *parse_state, struct perf_pmu *pmu) { - if (!parse_state->hybrid_pmu_name) - return 0; + if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name) + return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name); - return strcmp(parse_state->hybrid_pmu_name, pmu->name); + if (parse_state->hybrid_pmu_name) + return strcmp(parse_state->hybrid_pmu_name, pmu->name); + + return 0; } static int add_hw_hybrid(struct parse_events_state *parse_state, From 8ff4f20f3eb55dea0dbbe5e32043ab6b7427882f Mon Sep 17 00:00:00 2001 From: Andrew Kilroy Date: Fri, 3 Dec 2021 12:35:22 +0000 Subject: [PATCH 045/493] perf vendor events arm64: Fix JSON indentation to 4 spaces standard Correct indentation to 4 spaces, same as the other JSON files. Reviewed-by: John Garry Signed-off-by: Andrew Kilroy Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20211203123525.31127-2-andrew.kilroy@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/armv8-recommended.json | 202 +++++++++--------- 1 file changed, 101 insertions(+), 101 deletions(-) diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json index d0a19866563d..210afa856091 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json +++ b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json @@ -148,305 +148,305 @@ "EventCode": "0x60", "EventName": "BUS_ACCESS_RD", "BriefDescription": "Bus access read" - }, - { + }, + { "PublicDescription": "Bus access write", "EventCode": "0x61", "EventName": "BUS_ACCESS_WR", "BriefDescription": "Bus access write" - }, - { + }, + { "PublicDescription": "Bus access, Normal, Cacheable, Shareable", "EventCode": "0x62", "EventName": "BUS_ACCESS_SHARED", "BriefDescription": "Bus access, Normal, Cacheable, Shareable" - }, - { + }, + { "PublicDescription": "Bus access, not Normal, Cacheable, Shareable", "EventCode": "0x63", "EventName": "BUS_ACCESS_NOT_SHARED", "BriefDescription": "Bus access, not Normal, Cacheable, Shareable" - }, - { + }, + { "PublicDescription": "Bus access, Normal", "EventCode": "0x64", "EventName": "BUS_ACCESS_NORMAL", "BriefDescription": "Bus access, Normal" - }, - { + }, + { "PublicDescription": "Bus access, peripheral", "EventCode": "0x65", "EventName": "BUS_ACCESS_PERIPH", "BriefDescription": "Bus access, peripheral" - }, - { + }, + { "PublicDescription": "Data memory access, read", "EventCode": "0x66", "EventName": "MEM_ACCESS_RD", "BriefDescription": "Data memory access, read" - }, - { + }, + { "PublicDescription": "Data memory access, write", "EventCode": "0x67", "EventName": "MEM_ACCESS_WR", "BriefDescription": "Data memory access, write" - }, - { + }, + { "PublicDescription": "Unaligned access, read", "EventCode": "0x68", "EventName": "UNALIGNED_LD_SPEC", "BriefDescription": "Unaligned access, read" - }, - { + }, + { "PublicDescription": "Unaligned access, write", "EventCode": "0x69", "EventName": "UNALIGNED_ST_SPEC", "BriefDescription": "Unaligned access, write" - }, - { + }, + { "PublicDescription": "Unaligned access", "EventCode": "0x6a", "EventName": "UNALIGNED_LDST_SPEC", "BriefDescription": "Unaligned access" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX", "EventCode": "0x6c", "EventName": "LDREX_SPEC", "BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass", "EventCode": "0x6d", "EventName": "STREX_PASS_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail", "EventCode": "0x6e", "EventName": "STREX_FAIL_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX", "EventCode": "0x6f", "EventName": "STREX_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, load", "EventCode": "0x70", "EventName": "LD_SPEC", "BriefDescription": "Operation speculatively executed, load" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, store", "EventCode": "0x71", "EventName": "ST_SPEC", "BriefDescription": "Operation speculatively executed, store" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, load or store", "EventCode": "0x72", "EventName": "LDST_SPEC", "BriefDescription": "Operation speculatively executed, load or store" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, integer data processing", "EventCode": "0x73", "EventName": "DP_SPEC", "BriefDescription": "Operation speculatively executed, integer data processing" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, Advanced SIMD instruction", "EventCode": "0x74", "EventName": "ASE_SPEC", "BriefDescription": "Operation speculatively executed, Advanced SIMD instruction" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, floating-point instruction", "EventCode": "0x75", "EventName": "VFP_SPEC", "BriefDescription": "Operation speculatively executed, floating-point instruction" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, software change of the PC", "EventCode": "0x76", "EventName": "PC_WRITE_SPEC", "BriefDescription": "Operation speculatively executed, software change of the PC" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, Cryptographic instruction", "EventCode": "0x77", "EventName": "CRYPTO_SPEC", "BriefDescription": "Operation speculatively executed, Cryptographic instruction" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, immediate branch", "EventCode": "0x78", "EventName": "BR_IMMED_SPEC", "BriefDescription": "Branch speculatively executed, immediate branch" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, procedure return", "EventCode": "0x79", "EventName": "BR_RETURN_SPEC", "BriefDescription": "Branch speculatively executed, procedure return" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, indirect branch", "EventCode": "0x7a", "EventName": "BR_INDIRECT_SPEC", "BriefDescription": "Branch speculatively executed, indirect branch" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, ISB", "EventCode": "0x7c", "EventName": "ISB_SPEC", "BriefDescription": "Barrier speculatively executed, ISB" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, DSB", "EventCode": "0x7d", "EventName": "DSB_SPEC", "BriefDescription": "Barrier speculatively executed, DSB" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, DMB", "EventCode": "0x7e", "EventName": "DMB_SPEC", "BriefDescription": "Barrier speculatively executed, DMB" - }, - { + }, + { "PublicDescription": "Exception taken, Other synchronous", "EventCode": "0x81", "EventName": "EXC_UNDEF", "BriefDescription": "Exception taken, Other synchronous" - }, - { + }, + { "PublicDescription": "Exception taken, Supervisor Call", "EventCode": "0x82", "EventName": "EXC_SVC", "BriefDescription": "Exception taken, Supervisor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Instruction Abort", "EventCode": "0x83", "EventName": "EXC_PABORT", "BriefDescription": "Exception taken, Instruction Abort" - }, - { + }, + { "PublicDescription": "Exception taken, Data Abort and SError", "EventCode": "0x84", "EventName": "EXC_DABORT", "BriefDescription": "Exception taken, Data Abort and SError" - }, - { + }, + { "PublicDescription": "Exception taken, IRQ", "EventCode": "0x86", "EventName": "EXC_IRQ", "BriefDescription": "Exception taken, IRQ" - }, - { + }, + { "PublicDescription": "Exception taken, FIQ", "EventCode": "0x87", "EventName": "EXC_FIQ", "BriefDescription": "Exception taken, FIQ" - }, - { + }, + { "PublicDescription": "Exception taken, Secure Monitor Call", "EventCode": "0x88", "EventName": "EXC_SMC", "BriefDescription": "Exception taken, Secure Monitor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Hypervisor Call", "EventCode": "0x8a", "EventName": "EXC_HVC", "BriefDescription": "Exception taken, Hypervisor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Instruction Abort not taken locally", "EventCode": "0x8b", "EventName": "EXC_TRAP_PABORT", "BriefDescription": "Exception taken, Instruction Abort not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, Data Abort or SError not taken locally", "EventCode": "0x8c", "EventName": "EXC_TRAP_DABORT", "BriefDescription": "Exception taken, Data Abort or SError not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, Other traps not taken locally", "EventCode": "0x8d", "EventName": "EXC_TRAP_OTHER", "BriefDescription": "Exception taken, Other traps not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, IRQ not taken locally", "EventCode": "0x8e", "EventName": "EXC_TRAP_IRQ", "BriefDescription": "Exception taken, IRQ not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, FIQ not taken locally", "EventCode": "0x8f", "EventName": "EXC_TRAP_FIQ", "BriefDescription": "Exception taken, FIQ not taken locally" - }, - { + }, + { "PublicDescription": "Release consistency operation speculatively executed, Load-Acquire", "EventCode": "0x90", "EventName": "RC_LD_SPEC", "BriefDescription": "Release consistency operation speculatively executed, Load-Acquire" - }, - { + }, + { "PublicDescription": "Release consistency operation speculatively executed, Store-Release", "EventCode": "0x91", "EventName": "RC_ST_SPEC", "BriefDescription": "Release consistency operation speculatively executed, Store-Release" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, read", "EventCode": "0xa0", "EventName": "L3D_CACHE_RD", "BriefDescription": "Attributable Level 3 data or unified cache access, read" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, write", "EventCode": "0xa1", "EventName": "L3D_CACHE_WR", "BriefDescription": "Attributable Level 3 data or unified cache access, write" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache refill, read", "EventCode": "0xa2", "EventName": "L3D_CACHE_REFILL_RD", "BriefDescription": "Attributable Level 3 data or unified cache refill, read" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache refill, write", "EventCode": "0xa3", "EventName": "L3D_CACHE_REFILL_WR", "BriefDescription": "Attributable Level 3 data or unified cache refill, write" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim", "EventCode": "0xa6", "EventName": "L3D_CACHE_WB_VICTIM", "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean", "EventCode": "0xa7", "EventName": "L3D_CACHE_WB_CLEAN", "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, invalidate", "EventCode": "0xa8", "EventName": "L3D_CACHE_INVAL", "BriefDescription": "Attributable Level 3 data or unified cache access, invalidate" - } + } ] From 62c46d55688894c517d26b88ddb37a8dfa8a155a Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 2 Dec 2021 10:11:25 -0700 Subject: [PATCH 046/493] MAINTAINERS: Removing Ohad from remoteproc/rpmsg maintenance Ohad has not reviewed patches in the remoteproc and rpmsg subsystems for several years now: $ git log --no-merges --format=email drivers/remoteproc/ drivers/rpmsg/ | \ grep -Pi "^Subject:|^Date:|^[\w\-]+-by:.*ohad*" | grep -B2 ohad Date: Wed, 16 Sep 2015 07:32:54 -0500 Subject: [PATCH] remoteproc/wkup_m3: Use MODULE_DEVICE_TABLE to export alias Signed-off-by: Ohad Ben-Cohen Date: Fri, 28 Aug 2015 18:08:19 -0700 Subject: [PATCH] remoteproc: report error if resource table doesn't exist Signed-off-by: Ohad Ben-Cohen -- Date: Wed, 16 Sep 2015 19:29:18 -0500 Subject: [PATCH] remoteproc: fix memory leak of remoteproc ida cache layers Signed-off-by: Ohad Ben-Cohen Date: Fri, 20 Nov 2015 18:26:07 +0100 Subject: [PATCH] remoteproc: avoid stack overflow in debugfs file Signed-off-by: Ohad Ben-Cohen Date: Thu, 18 Jun 2015 11:44:41 +0300 Subject: [PATCH] remoteproc: fix !CONFIG_OF build breakage Signed-off-by: Ohad Ben-Cohen Date: Fri, 22 May 2015 15:45:30 -0500 Subject: [PATCH] remoteproc/wkup_m3: add a remoteproc driver for TI Wakeup M3 Signed-off-by: Ohad Ben-Cohen As such move his names to the CREDITS file. Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20211202171125.903608-1-mathieu.poirier@linaro.org Acked-by: Ohad Ben Cohen --- CREDITS | 5 +++++ MAINTAINERS | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index d8f63e8329e8..b97256d5bc24 100644 --- a/CREDITS +++ b/CREDITS @@ -315,6 +315,11 @@ S: Via Delle Palme, 9 S: Terni 05100 S: Italy +N: Ohad Ben Cohen +E: ohad@wizery.com +D: Remote Processor (remoteproc) subsystem +D: Remote Processor Messaging (rpmsg) subsystem + N: Krzysztof Benedyczak E: golbi@mat.uni.torun.pl W: http://www.mat.uni.torun.pl/~golbi diff --git a/MAINTAINERS b/MAINTAINERS index 7a2345ce8521..ee82a598b3ba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16171,7 +16171,6 @@ S: Supported F: fs/reiserfs/ REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM -M: Ohad Ben-Cohen M: Bjorn Andersson M: Mathieu Poirier L: linux-remoteproc@vger.kernel.org @@ -16185,7 +16184,6 @@ F: include/linux/remoteproc.h F: include/linux/remoteproc/ REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM -M: Ohad Ben-Cohen M: Bjorn Andersson M: Mathieu Poirier L: linux-remoteproc@vger.kernel.org From 8f86e69536f32a645e45a00d3b2ea73af5e41180 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 6 Dec 2021 20:18:58 +0100 Subject: [PATCH 047/493] remoteproc: Fix remaining wrong return formatting in documentation kernel documentation specification: "The return value, if any, should be described in a dedicated section named Return." Signed-off-by: Arnaud Pouliquen Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211206191858.10741-1-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier --- drivers/remoteproc/mtk_scp_ipi.c | 4 ++-- drivers/remoteproc/st_slim_rproc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/mtk_scp_ipi.c b/drivers/remoteproc/mtk_scp_ipi.c index 6dc955ecab80..00f041ebcde6 100644 --- a/drivers/remoteproc/mtk_scp_ipi.c +++ b/drivers/remoteproc/mtk_scp_ipi.c @@ -23,7 +23,7 @@ * * Register an ipi function to receive ipi interrupt from SCP. * - * Returns 0 if ipi registers successfully, -error on error. + * Return: 0 if ipi registers successfully, -error on error. */ int scp_ipi_register(struct mtk_scp *scp, u32 id, @@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(scp_ipi_unlock); * When the processing completes, IPI handler registered * by scp_ipi_register will be called in interrupt context. * - * Returns 0 if sending data successfully, -error on error. + * Return: 0 if sending data successfully, -error on error. **/ int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len, unsigned int wait) diff --git a/drivers/remoteproc/st_slim_rproc.c b/drivers/remoteproc/st_slim_rproc.c index 22096adc1ad3..4ed9467897e5 100644 --- a/drivers/remoteproc/st_slim_rproc.c +++ b/drivers/remoteproc/st_slim_rproc.c @@ -216,7 +216,7 @@ static const struct rproc_ops slim_rproc_ops = { * obtains and enables any clocks required by the SLIM core and also * ioremaps the various IO. * - * Returns st_slim_rproc pointer or PTR_ERR() on error. + * Return: st_slim_rproc pointer or PTR_ERR() on error. */ struct st_slim_rproc *st_slim_rproc_alloc(struct platform_device *pdev, From 8066c615cb69b7da8a94f59379847b037b3a5e46 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 6 Dec 2021 20:07:58 +0100 Subject: [PATCH 048/493] rpmsg: core: Clean up resources on announce_create failure. During the rpmsg_dev_probe, if rpdev->ops->announce_create returns an error, the rpmsg device and default endpoint should be freed before exiting the function. Fixes: 5e619b48677c ("rpmsg: Split rpmsg core and virtio backend") Suggested-by: Bjorn Andersson Signed-off-by: Arnaud Pouliquen Reviewed-by: Bjorn Andersson Cc: stable Link: https://lore.kernel.org/r/20211206190758.10004-1-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier --- drivers/rpmsg/rpmsg_core.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index f031b2b1b21c..d9e612f4f0f2 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -540,13 +540,25 @@ static int rpmsg_dev_probe(struct device *dev) err = rpdrv->probe(rpdev); if (err) { dev_err(dev, "%s: failed: %d\n", __func__, err); - if (ept) - rpmsg_destroy_ept(ept); - goto out; + goto destroy_ept; } - if (ept && rpdev->ops->announce_create) + if (ept && rpdev->ops->announce_create) { err = rpdev->ops->announce_create(rpdev); + if (err) { + dev_err(dev, "failed to announce creation\n"); + goto remove_rpdev; + } + } + + return 0; + +remove_rpdev: + if (rpdrv->remove) + rpdrv->remove(rpdev); +destroy_ept: + if (ept) + rpmsg_destroy_ept(ept); out: return err; } From 5ede7f0cfb93f0f8edf2245671e18e982a247f55 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 8 Dec 2021 09:33:35 -0800 Subject: [PATCH 049/493] Input: goodix - add pen support Some Goodix touchscreens have support for a (Goodix) active pen, add support for this. The info on how to detect when a pen is down and to detect when the stylus buttons are pressed was lifted from the out of tree Goodix driver with pen support written by Adya: https://gitlab.com/AdyaAdya/goodix-touchscreen-linux-driver/ Since there is no way to tell if pen support is present, the registering of the pen input_dev is delayed till the first pen event is detected. This has been tested on a Trekstor Surftab duo W1, a Chuwi Hi13 and a Cyberbook T116 tablet. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=202161 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204513 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211207100754.31155-3-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 122 ++++++++++++++++++++++++++++- drivers/input/touchscreen/goodix.h | 1 + 2 files changed, 121 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index b5cc91788195..48362bed7335 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -296,6 +296,107 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) return -ENOMSG; } +static struct input_dev *goodix_create_pen_input(struct goodix_ts_data *ts) +{ + struct device *dev = &ts->client->dev; + struct input_dev *input; + + input = devm_input_allocate_device(dev); + if (!input) + return NULL; + + input_alloc_absinfo(input); + if (!input->absinfo) { + input_free_device(input); + return NULL; + } + + input->absinfo[ABS_X] = ts->input_dev->absinfo[ABS_MT_POSITION_X]; + input->absinfo[ABS_Y] = ts->input_dev->absinfo[ABS_MT_POSITION_Y]; + __set_bit(ABS_X, input->absbit); + __set_bit(ABS_Y, input->absbit); + input_set_abs_params(input, ABS_PRESSURE, 0, 255, 0, 0); + + input_set_capability(input, EV_KEY, BTN_TOUCH); + input_set_capability(input, EV_KEY, BTN_TOOL_PEN); + input_set_capability(input, EV_KEY, BTN_STYLUS); + input_set_capability(input, EV_KEY, BTN_STYLUS2); + __set_bit(INPUT_PROP_DIRECT, input->propbit); + /* + * The resolution of these touchscreens is about 10 units/mm, the actual + * resolution does not matter much since we set INPUT_PROP_DIRECT. + * Userspace wants something here though, so just set it to 10 units/mm. + */ + input_abs_set_res(input, ABS_X, 10); + input_abs_set_res(input, ABS_Y, 10); + + input->name = "Goodix Active Pen"; + input->phys = "input/pen"; + input->id.bustype = BUS_I2C; + if (kstrtou16(ts->id, 10, &input->id.product)) + input->id.product = 0x1001; + input->id.version = ts->version; + + if (input_register_device(input) != 0) { + input_free_device(input); + return NULL; + } + + return input; +} + +static void goodix_ts_report_pen_down(struct goodix_ts_data *ts, u8 *data) +{ + int input_x, input_y, input_w; + u8 key_value; + + if (!ts->input_pen) { + ts->input_pen = goodix_create_pen_input(ts); + if (!ts->input_pen) + return; + } + + if (ts->contact_size == 9) { + input_x = get_unaligned_le16(&data[4]); + input_y = get_unaligned_le16(&data[6]); + input_w = get_unaligned_le16(&data[8]); + } else { + input_x = get_unaligned_le16(&data[2]); + input_y = get_unaligned_le16(&data[4]); + input_w = get_unaligned_le16(&data[6]); + } + + touchscreen_report_pos(ts->input_pen, &ts->prop, input_x, input_y, false); + input_report_abs(ts->input_pen, ABS_PRESSURE, input_w); + + input_report_key(ts->input_pen, BTN_TOUCH, 1); + input_report_key(ts->input_pen, BTN_TOOL_PEN, 1); + + if (data[0] & GOODIX_HAVE_KEY) { + key_value = data[1 + ts->contact_size]; + input_report_key(ts->input_pen, BTN_STYLUS, key_value & 0x10); + input_report_key(ts->input_pen, BTN_STYLUS2, key_value & 0x20); + } else { + input_report_key(ts->input_pen, BTN_STYLUS, 0); + input_report_key(ts->input_pen, BTN_STYLUS2, 0); + } + + input_sync(ts->input_pen); +} + +static void goodix_ts_report_pen_up(struct goodix_ts_data *ts) +{ + if (!ts->input_pen) + return; + + input_report_key(ts->input_pen, BTN_TOUCH, 0); + input_report_key(ts->input_pen, BTN_TOOL_PEN, 0); + input_report_key(ts->input_pen, BTN_STYLUS, 0); + input_report_key(ts->input_pen, BTN_STYLUS2, 0); + + input_sync(ts->input_pen); +} + static void goodix_ts_report_touch_8b(struct goodix_ts_data *ts, u8 *coor_data) { int id = coor_data[0] & 0x0F; @@ -326,6 +427,14 @@ static void goodix_ts_report_touch_9b(struct goodix_ts_data *ts, u8 *coor_data) input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w); } +static void goodix_ts_release_keys(struct goodix_ts_data *ts) +{ + int i; + + for (i = 0; i < GOODIX_MAX_KEYS; i++) + input_report_key(ts->input_dev, ts->keymap[i], 0); +} + static void goodix_ts_report_key(struct goodix_ts_data *ts, u8 *data) { int touch_num; @@ -340,8 +449,7 @@ static void goodix_ts_report_key(struct goodix_ts_data *ts, u8 *data) input_report_key(ts->input_dev, ts->keymap[i], 1); } else { - for (i = 0; i < GOODIX_MAX_KEYS; i++) - input_report_key(ts->input_dev, ts->keymap[i], 0); + goodix_ts_release_keys(ts); } } @@ -363,6 +471,15 @@ static void goodix_process_events(struct goodix_ts_data *ts) if (touch_num < 0) return; + /* The pen being down is always reported as a single touch */ + if (touch_num == 1 && (point_data[1] & 0x80)) { + goodix_ts_report_pen_down(ts, point_data); + goodix_ts_release_keys(ts); + goto sync; /* Release any previousle registered touches */ + } else { + goodix_ts_report_pen_up(ts); + } + goodix_ts_report_key(ts, point_data); for (i = 0; i < touch_num; i++) @@ -373,6 +490,7 @@ static void goodix_process_events(struct goodix_ts_data *ts) goodix_ts_report_touch_8b(ts, &point_data[1 + ts->contact_size * i]); +sync: input_mt_sync_frame(ts->input_dev); input_sync(ts->input_dev); } diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index 62138f930d1a..f79eaeaceedb 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -76,6 +76,7 @@ struct goodix_chip_data { struct goodix_ts_data { struct i2c_client *client; struct input_dev *input_dev; + struct input_dev *input_pen; const struct goodix_chip_data *chip; const char *firmware_name; struct touchscreen_properties prop; From f96b2e77f6d1b0c5181e0eae3dc60a00a5cd692a Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 5 Sep 2021 16:40:54 +0200 Subject: [PATCH 050/493] i3c/master/mipi-i3c-hci: Prefer struct_size over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kzalloc() function. [1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Acked-by: Nicolas Pitre Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210905144054.5124-1-len.baker@gmx.com --- drivers/i3c/master/mipi-i3c-hci/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c index af873a9be050..2990ac9eaade 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dma.c +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c @@ -223,7 +223,7 @@ static int hci_dma_init(struct i3c_hci *hci) } if (nr_rings > XFER_RINGS) nr_rings = XFER_RINGS; - rings = kzalloc(sizeof(*rings) + nr_rings * sizeof(*rh), GFP_KERNEL); + rings = kzalloc(struct_size(rings, headers, nr_rings), GFP_KERNEL); if (!rings) return -ENOMEM; hci->io_data = rings; From 313ece22600bcda85aa654af42628385abff215f Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 12 Sep 2021 17:51:35 +0200 Subject: [PATCH 051/493] i3c/master/mipi-i3c-hci: Prefer kcalloc over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, use the purpose specific kcalloc() function instead of the argument size * count in the kzalloc() function. [1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Acked-by: Nicolas Pitre Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210912155135.7541-1-len.baker@gmx.com --- drivers/i3c/master/mipi-i3c-hci/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h index 80beb1d5be8f..f109923f6c3f 100644 --- a/drivers/i3c/master/mipi-i3c-hci/hci.h +++ b/drivers/i3c/master/mipi-i3c-hci/hci.h @@ -98,7 +98,7 @@ struct hci_xfer { static inline struct hci_xfer *hci_alloc_xfer(unsigned int n) { - return kzalloc(sizeof(struct hci_xfer) * n, GFP_KERNEL); + return kcalloc(n, sizeof(struct hci_xfer), GFP_KERNEL); } static inline void hci_free_xfer(struct hci_xfer *xfer, unsigned int n) From f18f98110f2b179792cb70d85cba697320a3790f Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 22 Sep 2021 17:56:00 +0100 Subject: [PATCH 052/493] i3c: fix incorrect address slot lookup on 64-bit The address slot bitmap is an array of unsigned long's which are the same size as an int on 32-bit platforms but not 64-bit. Loading the bitmap into an int could result in the incorrect status being returned for a slot and slots being reported as the wrong status. Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Cc: Boris Brezillon Cc: Alexandre Belloni Signed-off-by: Jamie Iles Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210922165600.179394-1-quic_jiles@quicinc.com --- drivers/i3c/master.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index c3b4c677b442..dfe18dcd008d 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -343,7 +343,8 @@ struct bus_type i3c_bus_type = { static enum i3c_addr_slot_status i3c_bus_get_addr_slot_status(struct i3c_bus *bus, u16 addr) { - int status, bitpos = addr * 2; + unsigned long status; + int bitpos = addr * 2; if (addr > I2C_MAX_ADDR) return I3C_ADDR_SLOT_RSVD; From 3f43926f271287fb1744c9ac9ae1122497f2b0c2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 17 Nov 2021 23:05:23 +0100 Subject: [PATCH 053/493] i3c/master/mipi-i3c-hci: Fix a potentially infinite loop in 'hci_dat_v1_get_index()' The code in 'hci_dat_v1_get_index()' really looks like a hand coded version of 'for_each_set_bit()', except that a +1 is missing when searching for the next set bit. This really looks odd and it seems that it will loop until 'dat_w0_read()' returns the expected result. So use 'for_each_set_bit()' instead. It is less verbose and should be more correct. Fixes: 9ad9a52cce28 ("i3c/master: introduce the mipi-i3c-hci driver") Signed-off-by: Christophe JAILLET Acked-by: Nicolas Pitre Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/0cdf3cb10293ead1acd271fdb8a70369c298c082.1637186628.git.christophe.jaillet@wanadoo.fr --- drivers/i3c/master/mipi-i3c-hci/dat_v1.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c index 783e551a2c85..97bb49ff5b53 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c +++ b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c @@ -160,9 +160,7 @@ static int hci_dat_v1_get_index(struct i3c_hci *hci, u8 dev_addr) unsigned int dat_idx; u32 dat_w0; - for (dat_idx = find_first_bit(hci->DAT_data, hci->DAT_entries); - dat_idx < hci->DAT_entries; - dat_idx = find_next_bit(hci->DAT_data, hci->DAT_entries, dat_idx)) { + for_each_set_bit(dat_idx, hci->DAT_data, hci->DAT_entries) { dat_w0 = dat_w0_read(dat_idx); if (FIELD_GET(DAT_0_DYNAMIC_ADDRESS, dat_w0) == dev_addr) return dat_idx; From 84345c618e1e461519ac8235dca37bab1360a021 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Dec 2021 20:54:15 -0800 Subject: [PATCH 054/493] Input: goodix - improve gpiod_get() error logging goodix_get_gpio_config() errors are fatal (abort probe()) so log them at KERN_ERR level rather then as debug messages. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211212124242.81019-4-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 48362bed7335..148f460dcca7 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -954,7 +954,7 @@ retry_get_irq_gpio: if (IS_ERR(gpiod)) { error = PTR_ERR(gpiod); if (error != -EPROBE_DEFER) - dev_dbg(dev, "Failed to get %s GPIO: %d\n", + dev_err(dev, "Failed to get %s GPIO: %d\n", GOODIX_GPIO_INT_NAME, error); return error; } @@ -971,7 +971,7 @@ retry_get_irq_gpio: if (IS_ERR(gpiod)) { error = PTR_ERR(gpiod); if (error != -EPROBE_DEFER) - dev_dbg(dev, "Failed to get %s GPIO: %d\n", + dev_err(dev, "Failed to get %s GPIO: %d\n", GOODIX_GPIO_RST_NAME, error); return error; } From 71f4ecd5ee8463bd47783bb0a37516aee3e0fd38 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Dec 2021 20:54:29 -0800 Subject: [PATCH 055/493] Input: goodix - 2 small fixes for pen support 2 small fixes for pen support 1. Set the id.vendor field for the pen input_dev 2. Fix a typo in a comment Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211212124242.81019-5-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 148f460dcca7..ba84985b41c2 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -333,6 +333,7 @@ static struct input_dev *goodix_create_pen_input(struct goodix_ts_data *ts) input->name = "Goodix Active Pen"; input->phys = "input/pen"; input->id.bustype = BUS_I2C; + input->id.vendor = 0x0416; if (kstrtou16(ts->id, 10, &input->id.product)) input->id.product = 0x1001; input->id.version = ts->version; @@ -475,7 +476,7 @@ static void goodix_process_events(struct goodix_ts_data *ts) if (touch_num == 1 && (point_data[1] & 0x80)) { goodix_ts_report_pen_down(ts, point_data); goodix_ts_release_keys(ts); - goto sync; /* Release any previousle registered touches */ + goto sync; /* Release any previously registered touches */ } else { goodix_ts_report_pen_up(ts); } From 66d27d848fa6bf613bd716cb2c71769a211a6ffe Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Dec 2021 21:05:29 -0800 Subject: [PATCH 056/493] Input: silead - add support for EFI-embedded fw using different min/max coordinates Unfortunately, at the time of writing this commit message, we have been unable to get permission from Silead, or from device OEMs, to distribute the necessary Silead firmware files in linux-firmware. On a whole bunch of devices the UEFI BIOS code contains a touchscreen driver, which contains an embedded copy of the firmware. The fw-loader code has a "platform" fallback mechanism, which together with info on the firmware from drivers/platform/x86/touchscreen_dmi.c will use the firmware from the UEFI driver when the firmware is missing from /lib/firmware. This makes the touchscreen work OOTB without users needing to manually download the firmware. The firmware bundled with the original Windows/Android is usually newer then the firmware in the UEFI driver and it is better calibrated. This better calibration can lead to significant differences in the reported min/max coordinates. Add support for a new (optional) "silead,efi-fw-min-max" property which provides a set of alternative min/max values to use for the x/y axis when the EFI embedded firmware is used. The new property is only used on (x86) devices which do not use devicetree, IOW it is not used in actual devicetree files. The devicetree-bindings maintainers have requested properties like these to not be added to the devicetree-bindings, so the new property is deliberately not added to the existing silead devicetree-bindings documentation. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211122220637.11386-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/silead.c | 73 ++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 1ee760bac0cf..caa25af53e6e 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -75,6 +75,8 @@ struct silead_ts_data { struct input_mt_pos pos[SILEAD_MAX_FINGERS]; int slots[SILEAD_MAX_FINGERS]; int id[SILEAD_MAX_FINGERS]; + u32 efi_fw_min_max[4]; + bool efi_fw_min_max_set; }; struct silead_fw_data { @@ -82,6 +84,35 @@ struct silead_fw_data { u32 val; }; +static void silead_apply_efi_fw_min_max(struct silead_ts_data *data) +{ + struct input_absinfo *absinfo_x = &data->input->absinfo[ABS_MT_POSITION_X]; + struct input_absinfo *absinfo_y = &data->input->absinfo[ABS_MT_POSITION_Y]; + + if (!data->efi_fw_min_max_set) + return; + + absinfo_x->minimum = data->efi_fw_min_max[0]; + absinfo_x->maximum = data->efi_fw_min_max[1]; + absinfo_y->minimum = data->efi_fw_min_max[2]; + absinfo_y->maximum = data->efi_fw_min_max[3]; + + if (data->prop.invert_x) { + absinfo_x->maximum -= absinfo_x->minimum; + absinfo_x->minimum = 0; + } + + if (data->prop.invert_y) { + absinfo_y->maximum -= absinfo_y->minimum; + absinfo_y->minimum = 0; + } + + if (data->prop.swap_x_y) { + swap(absinfo_x->minimum, absinfo_y->minimum); + swap(absinfo_x->maximum, absinfo_y->maximum); + } +} + static int silead_ts_request_input_dev(struct silead_ts_data *data) { struct device *dev = &data->client->dev; @@ -97,6 +128,7 @@ static int silead_ts_request_input_dev(struct silead_ts_data *data) input_set_abs_params(data->input, ABS_MT_POSITION_X, 0, 4095, 0, 0); input_set_abs_params(data->input, ABS_MT_POSITION_Y, 0, 4095, 0, 0); touchscreen_parse_properties(data->input, true, &data->prop); + silead_apply_efi_fw_min_max(data); input_mt_init_slots(data->input, data->max_fingers, INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED | @@ -282,17 +314,48 @@ static int silead_ts_load_fw(struct i2c_client *client) { struct device *dev = &client->dev; struct silead_ts_data *data = i2c_get_clientdata(client); - unsigned int fw_size, i; - const struct firmware *fw; + const struct firmware *fw = NULL; struct silead_fw_data *fw_data; + unsigned int fw_size, i; int error; dev_dbg(dev, "Firmware file name: %s", data->fw_name); - error = firmware_request_platform(&fw, data->fw_name, dev); + /* + * Unfortunately, at the time of writing this comment, we have been unable to + * get permission from Silead, or from device OEMs, to distribute the necessary + * Silead firmware files in linux-firmware. + * + * On a whole bunch of devices the UEFI BIOS code contains a touchscreen driver, + * which contains an embedded copy of the firmware. The fw-loader code has a + * "platform" fallback mechanism, which together with info on the firmware + * from drivers/platform/x86/touchscreen_dmi.c will use the firmware from the + * UEFI driver when the firmware is missing from /lib/firmware. This makes the + * touchscreen work OOTB without users needing to manually download the firmware. + * + * The firmware bundled with the original Windows/Android is usually newer then + * the firmware in the UEFI driver and it is better calibrated. This better + * calibration can lead to significant differences in the reported min/max + * coordinates. + * + * To deal with this we first try to load the firmware without "platform" + * fallback. If that fails we retry with "platform" fallback and if that + * succeeds we apply an (optional) set of alternative min/max values from the + * "silead,efi-fw-min-max" property. + */ + error = firmware_request_nowarn(&fw, data->fw_name, dev); if (error) { - dev_err(dev, "Firmware request error %d\n", error); - return error; + error = firmware_request_platform(&fw, data->fw_name, dev); + if (error) { + dev_err(dev, "Firmware request error %d\n", error); + return error; + } + + error = device_property_read_u32_array(dev, "silead,efi-fw-min-max", + data->efi_fw_min_max, + ARRAY_SIZE(data->efi_fw_min_max)); + if (!error) + data->efi_fw_min_max_set = true; } fw_size = fw->size / sizeof(*fw_data); From 046612a3f592044762789ec086feb2cdaaf393f6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Dec 2021 21:06:25 -0800 Subject: [PATCH 057/493] Input: silead - add pen support Some Silead touchscreens have support for an active (battery powered) pen, add support for this. So far pen-support has only been seen on X86/ACPI (non devicetree) devs, IOW it is not used in actual devicetree files. The devicetree-bindings maintainers have requested properties like these to not be added to the devicetree-bindings, so the new properties are deliberately not added to the existing silead devicetree-bindings documentation. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211122220637.11386-3-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/silead.c | 99 ++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index caa25af53e6e..3eef8c01090f 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -67,6 +67,7 @@ struct silead_ts_data { struct i2c_client *client; struct gpio_desc *gpio_power; struct input_dev *input; + struct input_dev *pen_input; struct regulator_bulk_data regulators[2]; char fw_name[64]; struct touchscreen_properties prop; @@ -77,6 +78,11 @@ struct silead_ts_data { int id[SILEAD_MAX_FINGERS]; u32 efi_fw_min_max[4]; bool efi_fw_min_max_set; + bool pen_supported; + bool pen_down; + u32 pen_x_res; + u32 pen_y_res; + int pen_up_count; }; struct silead_fw_data { @@ -144,6 +150,40 @@ static int silead_ts_request_input_dev(struct silead_ts_data *data) error = input_register_device(data->input); if (error) { dev_err(dev, "Failed to register input device: %d\n", error); + return error; + } + + return 0; +} + +static int silead_ts_request_pen_input_dev(struct silead_ts_data *data) +{ + struct device *dev = &data->client->dev; + int error; + + if (!data->pen_supported) + return 0; + + data->pen_input = devm_input_allocate_device(dev); + if (!data->pen_input) + return -ENOMEM; + + input_set_abs_params(data->pen_input, ABS_X, 0, 4095, 0, 0); + input_set_abs_params(data->pen_input, ABS_Y, 0, 4095, 0, 0); + input_set_capability(data->pen_input, EV_KEY, BTN_TOUCH); + input_set_capability(data->pen_input, EV_KEY, BTN_TOOL_PEN); + set_bit(INPUT_PROP_DIRECT, data->pen_input->propbit); + touchscreen_parse_properties(data->pen_input, false, &data->prop); + input_abs_set_res(data->pen_input, ABS_X, data->pen_x_res); + input_abs_set_res(data->pen_input, ABS_Y, data->pen_y_res); + + data->pen_input->name = SILEAD_TS_NAME " pen"; + data->pen_input->phys = "input/pen"; + data->input->id.bustype = BUS_I2C; + + error = input_register_device(data->pen_input); + if (error) { + dev_err(dev, "Failed to register pen input device: %d\n", error); return error; } @@ -161,6 +201,45 @@ static void silead_ts_set_power(struct i2c_client *client, } } +static bool silead_ts_handle_pen_data(struct silead_ts_data *data, u8 *buf) +{ + u8 *coord = buf + SILEAD_POINT_DATA_LEN; + struct input_mt_pos pos; + + if (!data->pen_supported || buf[2] != 0x00 || buf[3] != 0x00) + return false; + + if (buf[0] == 0x00 && buf[1] == 0x00 && data->pen_down) { + data->pen_up_count++; + if (data->pen_up_count == 6) { + data->pen_down = false; + goto sync; + } + return true; + } + + if (buf[0] == 0x01 && buf[1] == 0x08) { + touchscreen_set_mt_pos(&pos, &data->prop, + get_unaligned_le16(&coord[SILEAD_POINT_X_OFF]) & 0xfff, + get_unaligned_le16(&coord[SILEAD_POINT_Y_OFF]) & 0xfff); + + input_report_abs(data->pen_input, ABS_X, pos.x); + input_report_abs(data->pen_input, ABS_Y, pos.y); + + data->pen_up_count = 0; + data->pen_down = true; + goto sync; + } + + return false; + +sync: + input_report_key(data->pen_input, BTN_TOOL_PEN, data->pen_down); + input_report_key(data->pen_input, BTN_TOUCH, data->pen_down); + input_sync(data->pen_input); + return true; +} + static void silead_ts_read_data(struct i2c_client *client) { struct silead_ts_data *data = i2c_get_clientdata(client); @@ -183,6 +262,9 @@ static void silead_ts_read_data(struct i2c_client *client) buf[0] = data->max_fingers; } + if (silead_ts_handle_pen_data(data, buf)) + goto sync; /* Pen is down, release all previous touches */ + touch_nr = 0; bufp = buf + SILEAD_POINT_DATA_LEN; for (i = 0; i < buf[0]; i++, bufp += SILEAD_POINT_DATA_LEN) { @@ -225,6 +307,7 @@ static void silead_ts_read_data(struct i2c_client *client) data->pos[i].y, data->id[i], data->slots[i]); } +sync: input_mt_sync_frame(input); input_report_key(input, KEY_LEFTMETA, softbutton_pressed); input_sync(input); @@ -356,6 +439,14 @@ static int silead_ts_load_fw(struct i2c_client *client) ARRAY_SIZE(data->efi_fw_min_max)); if (!error) data->efi_fw_min_max_set = true; + + /* The EFI (platform) embedded fw does not have pen support */ + if (data->pen_supported) { + dev_warn(dev, "Warning loading '%s' from filesystem failed, using EFI embedded copy.\n", + data->fw_name); + dev_warn(dev, "Warning pen support is known to be broken in the EFI embedded fw version\n"); + data->pen_supported = false; + } } fw_size = fw->size / sizeof(*fw_data); @@ -513,6 +604,10 @@ static void silead_ts_read_props(struct i2c_client *client) "silead/%s", str); else dev_dbg(dev, "Firmware file name read error. Using default."); + + data->pen_supported = device_property_read_bool(dev, "silead,pen-supported"); + device_property_read_u32(dev, "silead,pen-resolution-x", &data->pen_x_res); + device_property_read_u32(dev, "silead,pen-resolution-y", &data->pen_y_res); } #ifdef CONFIG_ACPI @@ -625,6 +720,10 @@ static int silead_ts_probe(struct i2c_client *client, if (error) return error; + error = silead_ts_request_pen_input_dev(data); + if (error) + return error; + error = devm_request_threaded_irq(dev, client->irq, NULL, silead_ts_threaded_irq_handler, IRQF_ONESHOT, client->name, data); From d3c76a42ecc7c41ddcba1455f4f6631648812df0 Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Tue, 7 Dec 2021 17:58:28 +0100 Subject: [PATCH 058/493] dt-bindings: remoteproc: Add Renesas R-Car Renesas R-Car SoCs may contain a Realtime processor. This patch adds bindings for this remote processor. Signed-off-by: Julien Massot Reviewed-by: Rob Herring Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211207165829.195537-2-julien.massot@iot.bzh Signed-off-by: Mathieu Poirier --- .../remoteproc/renesas,rcar-rproc.yaml | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml diff --git a/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml new file mode 100644 index 000000000000..a7d25fa920e5 --- /dev/null +++ b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/remoteproc/renesas,rcar-rproc.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Renesas R-Car remote processor controller bindings + +maintainers: + - Julien Massot + +description: | + This document defines the bindings for the remoteproc component that loads and + boots firmwares on the Renesas R-Car family chipset. + R-Car gen3 family may have a realtime processor, this processor shares peripheral + and RAM with the host processor with the same address map. + +properties: + compatible: + const: renesas,rcar-cr7 + + resets: + maxItems: 1 + + power-domains: + maxItems: 1 + + memory-region: + description: + List of phandles to the reserved memory regions associated with the + remoteproc device. This is variable and describes the memories shared with + the remote processor (e.g. remoteproc firmware and carveouts, rpmsg + vrings, ...). + (see ../reserved-memory/reserved-memory.yaml) + +required: + - compatible + - resets + - memory-region + - power-domains + +additionalProperties: false + +examples: + - | + #include + #include + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + + cr7_ram: cr7_ram@40040000 { + no-map; + reg = <0x0 0x40040000 0x0 0x1fc0000>; + }; + }; + + cr7_rproc: cr7 { + compatible = "renesas,rcar-cr7"; + memory-region = <&cr7_ram>; + power-domains = <&sysc R8A7795_PD_CR7>; + resets = <&cpg 222>; + }; + +... From 285892a74f1370a12249f765c6a4e3b16194852e Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Tue, 7 Dec 2021 17:58:29 +0100 Subject: [PATCH 059/493] remoteproc: Add Renesas rcar driver Renesas Gen3 platform includes a Cortex-r7 processor. Both: the application cores (A5x) and the realtime core (CR7) share access to the RAM and devices with the same address map, so device addresses are equal to the Linux physical addresses. In order to initialize this remote processor we need to: - power on the realtime core - put the firmware in a RAM area - set the boot address for this firmware (reset vector) - Deassert the reset This initial driver allows to start and stop the Cortex R7 processor. Signed-off-by: Julien Massot Link: https://lore.kernel.org/r/20211207165829.195537-3-julien.massot@iot.bzh Signed-off-by: Mathieu Poirier --- drivers/remoteproc/Kconfig | 11 ++ drivers/remoteproc/Makefile | 1 + drivers/remoteproc/rcar_rproc.c | 224 ++++++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+) create mode 100644 drivers/remoteproc/rcar_rproc.c diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index f2e961f998ca..3ddd426fc969 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -283,6 +283,17 @@ config QCOM_WCNSS_PIL verified and booted with the help of the Peripheral Authentication System (PAS) in TrustZone. +config RCAR_REMOTEPROC + tristate "Renesas R-Car Gen3 remoteproc support" + depends on ARCH_RENESAS || COMPILE_TEST + help + Say y here to support R-Car realtime processor via the + remote processor framework. An ELF firmware can be loaded + thanks to sysfs remoteproc entries. The remote processor + can be started and stopped. + This can be either built-in or a loadable module. + If compiled as module (M), the module name is rcar_rproc. + config ST_REMOTEPROC tristate "ST remoteproc support" depends on ARCH_STI diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile index 0ac256b6c977..5478c7cb9e07 100644 --- a/drivers/remoteproc/Makefile +++ b/drivers/remoteproc/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_QCOM_SYSMON) += qcom_sysmon.o obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss_pil.o qcom_wcnss_pil-y += qcom_wcnss.o qcom_wcnss_pil-y += qcom_wcnss_iris.o +obj-$(CONFIG_RCAR_REMOTEPROC) += rcar_rproc.o obj-$(CONFIG_ST_REMOTEPROC) += st_remoteproc.o obj-$(CONFIG_ST_SLIM_REMOTEPROC) += st_slim_rproc.o obj-$(CONFIG_STM32_RPROC) += stm32_rproc.o diff --git a/drivers/remoteproc/rcar_rproc.c b/drivers/remoteproc/rcar_rproc.c new file mode 100644 index 000000000000..34fd867f9f8c --- /dev/null +++ b/drivers/remoteproc/rcar_rproc.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) IoT.bzh 2021 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "remoteproc_internal.h" + +struct rcar_rproc { + struct reset_control *rst; +}; + +static int rcar_rproc_mem_alloc(struct rproc *rproc, + struct rproc_mem_entry *mem) +{ + struct device *dev = &rproc->dev; + void *va; + + dev_dbg(dev, "map memory: %pa+%zx\n", &mem->dma, mem->len); + va = ioremap_wc(mem->dma, mem->len); + if (!va) { + dev_err(dev, "Unable to map memory region: %pa+%zx\n", + &mem->dma, mem->len); + return -ENOMEM; + } + + /* Update memory entry va */ + mem->va = va; + + return 0; +} + +static int rcar_rproc_mem_release(struct rproc *rproc, + struct rproc_mem_entry *mem) +{ + dev_dbg(&rproc->dev, "unmap memory: %pa\n", &mem->dma); + iounmap(mem->va); + + return 0; +} + +static int rcar_rproc_prepare(struct rproc *rproc) +{ + struct device *dev = rproc->dev.parent; + struct device_node *np = dev->of_node; + struct of_phandle_iterator it; + struct rproc_mem_entry *mem; + struct reserved_mem *rmem; + u32 da; + + /* Register associated reserved memory regions */ + of_phandle_iterator_init(&it, np, "memory-region", NULL, 0); + while (of_phandle_iterator_next(&it) == 0) { + + rmem = of_reserved_mem_lookup(it.node); + if (!rmem) { + dev_err(&rproc->dev, + "unable to acquire memory-region\n"); + return -EINVAL; + } + + if (rmem->base > U32_MAX) + return -EINVAL; + + /* No need to translate pa to da, R-Car use same map */ + da = rmem->base; + mem = rproc_mem_entry_init(dev, NULL, + rmem->base, + rmem->size, da, + rcar_rproc_mem_alloc, + rcar_rproc_mem_release, + it.node->name); + + if (!mem) + return -ENOMEM; + + rproc_add_carveout(rproc, mem); + } + + return 0; +} + +static int rcar_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw) +{ + int ret; + + ret = rproc_elf_load_rsc_table(rproc, fw); + if (ret) + dev_info(&rproc->dev, "No resource table in elf\n"); + + return 0; +} + +static int rcar_rproc_start(struct rproc *rproc) +{ + struct rcar_rproc *priv = rproc->priv; + int err; + + if (!rproc->bootaddr) + return -EINVAL; + + err = rcar_rst_set_rproc_boot_addr(rproc->bootaddr); + if (err) { + dev_err(&rproc->dev, "failed to set rproc boot addr\n"); + return err; + } + + err = reset_control_deassert(priv->rst); + if (err) + dev_err(&rproc->dev, "failed to deassert reset\n"); + + return err; +} + +static int rcar_rproc_stop(struct rproc *rproc) +{ + struct rcar_rproc *priv = rproc->priv; + int err; + + err = reset_control_assert(priv->rst); + if (err) + dev_err(&rproc->dev, "failed to assert reset\n"); + + return err; +} + +static struct rproc_ops rcar_rproc_ops = { + .prepare = rcar_rproc_prepare, + .start = rcar_rproc_start, + .stop = rcar_rproc_stop, + .load = rproc_elf_load_segments, + .parse_fw = rcar_rproc_parse_fw, + .find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table, + .sanity_check = rproc_elf_sanity_check, + .get_boot_addr = rproc_elf_get_boot_addr, + +}; + +static int rcar_rproc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct rcar_rproc *priv; + struct rproc *rproc; + int ret; + + rproc = devm_rproc_alloc(dev, np->name, &rcar_rproc_ops, + NULL, sizeof(*priv)); + if (!rproc) + return -ENOMEM; + + priv = rproc->priv; + + priv->rst = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(priv->rst)) { + ret = PTR_ERR(priv->rst); + dev_err_probe(dev, ret, "fail to acquire rproc reset\n"); + return ret;; + } + + pm_runtime_enable(dev); + ret = pm_runtime_get_sync(dev); + if (ret) { + dev_err(dev, "failed to power up\n"); + return ret; + } + + dev_set_drvdata(dev, rproc); + + /* Manually start the rproc */ + rproc->auto_boot = false; + + ret = devm_rproc_add(dev, rproc); + if (ret) { + dev_err(dev, "rproc_add failed\n"); + goto pm_disable; + } + + return 0; + +pm_disable: + pm_runtime_disable(dev); + + return ret; +} + +static int rcar_rproc_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + pm_runtime_disable(dev); + + return 0; +} + +static const struct of_device_id rcar_rproc_of_match[] = { + { .compatible = "renesas,rcar-cr7" }, + {}, +}; + +MODULE_DEVICE_TABLE(of, rcar_rproc_of_match); + +static struct platform_driver rcar_rproc_driver = { + .probe = rcar_rproc_probe, + .remove = rcar_rproc_remove, + .driver = { + .name = "rcar-rproc", + .of_match_table = rcar_rproc_of_match, + }, +}; + +module_platform_driver(rcar_rproc_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Renesas R-Car Gen3 remote processor control driver"); +MODULE_AUTHOR("Julien Massot "); From fdc12231d885119cc2e2b4f3e0fbba3155f37a56 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 16 Nov 2021 22:54:54 -0800 Subject: [PATCH 060/493] remoteproc: qcom: pil_info: Don't memcpy_toio more than is provided If the string passed into qcom_pil_info_store() isn't as long as PIL_RELOC_NAME_LEN we'll try to copy the string assuming the length is PIL_RELOC_NAME_LEN to the io space and go beyond the bounds of the string. Let's only copy as many byes as the string is long, ignoring the NUL terminator. This fixes the following KASAN error: BUG: KASAN: global-out-of-bounds in __memcpy_toio+0x124/0x140 Read of size 1 at addr ffffffd35086e386 by task rmtfs/2392 CPU: 2 PID: 2392 Comm: rmtfs Tainted: G W 5.16.0-rc1-lockdep+ #10 Hardware name: Google Lazor (rev3+) with KB Backlight (DT) Call trace: dump_backtrace+0x0/0x410 show_stack+0x24/0x30 dump_stack_lvl+0x7c/0xa0 print_address_description+0x78/0x2bc kasan_report+0x160/0x1a0 __asan_report_load1_noabort+0x44/0x50 __memcpy_toio+0x124/0x140 qcom_pil_info_store+0x298/0x358 [qcom_pil_info] q6v5_start+0xdf0/0x12e0 [qcom_q6v5_mss] rproc_start+0x178/0x3a0 rproc_boot+0x5f0/0xb90 state_store+0x78/0x1bc dev_attr_store+0x70/0x90 sysfs_kf_write+0xf4/0x118 kernfs_fop_write_iter+0x208/0x300 vfs_write+0x55c/0x804 ksys_pwrite64+0xc8/0x134 __arm64_compat_sys_aarch32_pwrite64+0xc4/0xdc invoke_syscall+0x78/0x20c el0_svc_common+0x11c/0x1f0 do_el0_svc_compat+0x50/0x60 el0_svc_compat+0x5c/0xec el0t_32_sync_handler+0xc0/0xf0 el0t_32_sync+0x1a4/0x1a8 The buggy address belongs to the variable: .str.59+0x6/0xffffffffffffec80 [qcom_q6v5_mss] Memory state around the buggy address: ffffffd35086e280: 00 00 00 00 02 f9 f9 f9 f9 f9 f9 f9 00 00 00 00 ffffffd35086e300: 00 02 f9 f9 f9 f9 f9 f9 00 00 00 06 f9 f9 f9 f9 >ffffffd35086e380: 06 f9 f9 f9 05 f9 f9 f9 00 00 00 00 00 06 f9 f9 ^ ffffffd35086e400: f9 f9 f9 f9 01 f9 f9 f9 04 f9 f9 f9 00 00 01 f9 ffffffd35086e480: f9 f9 f9 f9 00 00 00 00 00 00 00 01 f9 f9 f9 f9 Fixes: 549b67da660d ("remoteproc: qcom: Introduce helper to store pil info in IMEM") Signed-off-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211117065454.4142936-1-swboyd@chromium.org --- drivers/remoteproc/qcom_pil_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/qcom_pil_info.c b/drivers/remoteproc/qcom_pil_info.c index 7c007dd7b200..aca21560e20b 100644 --- a/drivers/remoteproc/qcom_pil_info.c +++ b/drivers/remoteproc/qcom_pil_info.c @@ -104,7 +104,7 @@ int qcom_pil_info_store(const char *image, phys_addr_t base, size_t size) return -ENOMEM; found_unused: - memcpy_toio(entry, image, PIL_RELOC_NAME_LEN); + memcpy_toio(entry, image, strnlen(image, PIL_RELOC_NAME_LEN)); found_existing: /* Use two writel() as base is only aligned to 4 bytes on odd entries */ writel(base, entry + PIL_RELOC_NAME_LEN); From da87976921bba27199ee4df94081ac10fa0cf76f Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 2 Jun 2021 14:42:06 +0800 Subject: [PATCH 061/493] remoteproc: imx_rproc: correct firmware reload ENABLE_M4 should be set to 1 when loading code to TCM, otherwise you will not able to replace the firmware after you stop m4. Besides ENABLE_M4, we still need set SW_M4C_RST, because this bit will be automatically set with SW_M4C_NON_SCLR_RST set. Reviewed-by: Mathieu Poirier Signed-off-by: Peng Fan Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210602064206.27004-1-peng.fan@oss.nxp.com --- drivers/remoteproc/imx_rproc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 0a45bc0d3f73..7a096f1891e6 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -34,7 +34,8 @@ #define IMX7D_M4_START (IMX7D_ENABLE_M4 | IMX7D_SW_M4P_RST \ | IMX7D_SW_M4C_RST) -#define IMX7D_M4_STOP IMX7D_SW_M4C_NON_SCLR_RST +#define IMX7D_M4_STOP (IMX7D_ENABLE_M4 | IMX7D_SW_M4C_RST | \ + IMX7D_SW_M4C_NON_SCLR_RST) /* Address: 0x020D8000 */ #define IMX6SX_SRC_SCR 0x00 @@ -45,7 +46,8 @@ #define IMX6SX_M4_START (IMX6SX_ENABLE_M4 | IMX6SX_SW_M4P_RST \ | IMX6SX_SW_M4C_RST) -#define IMX6SX_M4_STOP IMX6SX_SW_M4C_NON_SCLR_RST +#define IMX6SX_M4_STOP (IMX6SX_ENABLE_M4 | IMX6SX_SW_M4C_RST | \ + IMX6SX_SW_M4C_NON_SCLR_RST) #define IMX6SX_M4_RST_MASK (IMX6SX_ENABLE_M4 | IMX6SX_SW_M4P_RST \ | IMX6SX_SW_M4C_NON_SCLR_RST \ | IMX6SX_SW_M4C_RST) @@ -684,7 +686,7 @@ static int imx_rproc_detect_mode(struct imx_rproc *priv) return ret; } - if (!(val & dcfg->src_stop)) + if ((val & dcfg->src_mask) != dcfg->src_stop) priv->rproc->state = RPROC_DETACHED; return 0; From dd585d9bfbf06fd08a6326c82978be1f06e7d1bd Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Fri, 25 Jun 2021 00:03:25 +0530 Subject: [PATCH 062/493] remoteproc: qcom: pas: Add missing power-domain "mxc" for CDSP Add missing power-domain "mxc" required by CDSP PAS remoteproc on SM8350 SoC. Fixes: e8b4e9a21af7 ("remoteproc: qcom: pas: Add SM8350 PAS remoteprocs") Signed-off-by: Sibi Sankar Cc: stable@vger.kernel.org Tested-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/1624559605-29847-1-git-send-email-sibis@codeaurora.org --- drivers/remoteproc/qcom_q6v5_pas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 03857dc9cdc1..120c16b14223 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -652,6 +652,7 @@ static const struct adsp_data sm8350_cdsp_resource = { .auto_boot = true, .proxy_pd_names = (char*[]){ "cx", + "mxc", NULL }, .load_state = "cdsp", From a15d36f04b9e118ef5c37782a91bd1f52877f65c Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 13 Dec 2021 09:22:01 +0100 Subject: [PATCH 063/493] dt-bindings: remoteproc: qcom: pas: Add SM6350 adsp, cdsp & mpss Add compatibles for the remoteprocs found in SM6350. Signed-off-by: Luca Weiss Acked-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211213082208.21492-2-luca.weiss@fairphone.com --- .../bindings/remoteproc/qcom,adsp.yaml | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml index 63e06d93bca3..c635c181d2c2 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml @@ -33,6 +33,9 @@ properties: - qcom,sdm845-adsp-pas - qcom,sdm845-cdsp-pas - qcom,sdx55-mpss-pas + - qcom,sm6350-adsp-pas + - qcom,sm6350-cdsp-pas + - qcom,sm6350-mpss-pas - qcom,sm8150-adsp-pas - qcom,sm8150-cdsp-pas - qcom,sm8150-mpss-pas @@ -158,6 +161,9 @@ allOf: - qcom,sc8180x-mpss-pas - qcom,sdm845-adsp-pas - qcom,sdm845-cdsp-pas + - qcom,sm6350-adsp-pas + - qcom,sm6350-cdsp-pas + - qcom,sm6350-mpss-pas - qcom,sm8150-adsp-pas - qcom,sm8150-cdsp-pas - qcom,sm8150-mpss-pas @@ -266,6 +272,8 @@ allOf: - qcom,sc8180x-cdsp-pas - qcom,sdm845-adsp-pas - qcom,sdm845-cdsp-pas + - qcom,sm6350-adsp-pas + - qcom,sm6350-cdsp-pas - qcom,sm8150-adsp-pas - qcom,sm8150-cdsp-pas - qcom,sm8150-slpi-pas @@ -301,6 +309,7 @@ allOf: - qcom,sc7280-mpss-pas - qcom,sc8180x-mpss-pas - qcom,sdx55-mpss-pas + - qcom,sm6350-mpss-pas - qcom,sm8150-mpss-pas - qcom,sm8350-mpss-pas then: @@ -385,6 +394,23 @@ allOf: - const: mx - const: mss + - if: + properties: + compatible: + contains: + enum: + - qcom,sm6350-cdsp-pas + then: + properties: + power-domains: + items: + - description: CX power domain + - description: MX power domain + power-domain-names: + items: + - const: cx + - const: mx + - if: properties: compatible: @@ -405,6 +431,7 @@ allOf: enum: - qcom,sc7280-mpss-pas - qcom,sdx55-mpss-pas + - qcom,sm6350-mpss-pas - qcom,sm8150-mpss-pas - qcom,sm8350-mpss-pas then: @@ -425,6 +452,7 @@ allOf: enum: - qcom,sc8180x-adsp-pas - qcom,sc8180x-cdsp-pas + - qcom,sm6350-adsp-pas - qcom,sm8150-slpi-pas - qcom,sm8250-adsp-pas - qcom,sm8250-slpi-pas From 42a3f554d81ede895e12fd9d22a112beaf3cbb2e Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 13 Dec 2021 09:22:02 +0100 Subject: [PATCH 064/493] remoteproc: qcom: pas: Add SM6350 MPSS support Add a config for the MPSS present on SM6350. Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211213082208.21492-3-luca.weiss@fairphone.com --- drivers/remoteproc/qcom_q6v5_pas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 120c16b14223..3b1236502eb3 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -805,6 +805,7 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,sdm845-adsp-pas", .data = &sdm845_adsp_resource_init}, { .compatible = "qcom,sdm845-cdsp-pas", .data = &sdm845_cdsp_resource_init}, { .compatible = "qcom,sdx55-mpss-pas", .data = &sdx55_mpss_resource}, + { .compatible = "qcom,sm6350-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sm8150-adsp-pas", .data = &sm8150_adsp_resource}, { .compatible = "qcom,sm8150-cdsp-pas", .data = &sm8150_cdsp_resource}, { .compatible = "qcom,sm8150-mpss-pas", .data = &mpss_resource_init}, From bfd75aefe32c63608c210227c662ba65541b8376 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 13 Dec 2021 09:22:04 +0100 Subject: [PATCH 065/493] remoteproc: qcom: pas: Add SM6350 ADSP support Add a config for the ADSP present on SM6350. Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211213082208.21492-5-luca.weiss@fairphone.com --- drivers/remoteproc/qcom_q6v5_pas.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 3b1236502eb3..3be6b943c1fd 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -524,6 +524,23 @@ static const struct adsp_data sdm845_adsp_resource_init = { .ssctl_id = 0x14, }; +static const struct adsp_data sm6350_adsp_resource = { + .crash_reason_smem = 423, + .firmware_name = "adsp.mdt", + .pas_id = 1, + .has_aggre2_clk = false, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "lcx", + "lmx", + NULL + }, + .load_state = "adsp", + .ssr_name = "lpass", + .sysmon_name = "adsp", + .ssctl_id = 0x14, +}; + static const struct adsp_data sm8150_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", @@ -805,6 +822,7 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,sdm845-adsp-pas", .data = &sdm845_adsp_resource_init}, { .compatible = "qcom,sdm845-cdsp-pas", .data = &sdm845_cdsp_resource_init}, { .compatible = "qcom,sdx55-mpss-pas", .data = &sdx55_mpss_resource}, + { .compatible = "qcom,sm6350-adsp-pas", .data = &sm6350_adsp_resource}, { .compatible = "qcom,sm6350-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sm8150-adsp-pas", .data = &sm8150_adsp_resource}, { .compatible = "qcom,sm8150-cdsp-pas", .data = &sm8150_cdsp_resource}, From 3bc0d1f9ef54cc3500898796b150078e6918ee21 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 13 Dec 2021 09:22:06 +0100 Subject: [PATCH 066/493] remoteproc: qcom: pas: Add SM6350 CDSP support Add a config for the CDSP present on SM6350. Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211213082208.21492-7-luca.weiss@fairphone.com --- drivers/remoteproc/qcom_q6v5_pas.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 3be6b943c1fd..184bb7cdf95a 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -629,6 +629,23 @@ static const struct adsp_data sdm845_cdsp_resource_init = { .ssctl_id = 0x17, }; +static const struct adsp_data sm6350_cdsp_resource = { + .crash_reason_smem = 601, + .firmware_name = "cdsp.mdt", + .pas_id = 18, + .has_aggre2_clk = false, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "cx", + "mx", + NULL + }, + .load_state = "cdsp", + .ssr_name = "cdsp", + .sysmon_name = "cdsp", + .ssctl_id = 0x17, +}; + static const struct adsp_data sm8150_cdsp_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", @@ -823,6 +840,7 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,sdm845-cdsp-pas", .data = &sdm845_cdsp_resource_init}, { .compatible = "qcom,sdx55-mpss-pas", .data = &sdx55_mpss_resource}, { .compatible = "qcom,sm6350-adsp-pas", .data = &sm6350_adsp_resource}, + { .compatible = "qcom,sm6350-cdsp-pas", .data = &sm6350_cdsp_resource}, { .compatible = "qcom,sm6350-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sm8150-adsp-pas", .data = &sm8150_adsp_resource}, { .compatible = "qcom,sm8150-cdsp-pas", .data = &sm8150_cdsp_resource}, From f0a29c9647ff8bbb424641f79bc1894e83dec218 Mon Sep 17 00:00:00 2001 From: Sohaib Mohamed Date: Fri, 19 Nov 2021 08:14:08 +0200 Subject: [PATCH 067/493] perf bench: Use unbuffered output when pipe/tee'ing to a file The output of 'perf bench' gets buffered when I pipe it to a file or to tee, in such a way that I can see it only at the end. E.g. $ perf bench internals synthesize -t < output comes out fine after each test run > $ perf bench internals synthesize -t | tee file.txt < output comes out only at the end of all tests > This patch resolves this issue for 'bench' and 'test' subcommands. See, also: $ perf bench mem all | tee file.txt $ perf bench sched all | tee file.txt $ perf bench internals all -t | tee file.txt $ perf bench internals all | tee file.txt Committer testing: It really gets staggered, i.e. outputs in bursts, when the buffer fills up and has to be drained to make up space for more output. Suggested-by: Riccardo Mancini Signed-off-by: Sohaib Mohamed Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Fabian Hemmer Cc: Ian Rogers Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211119061409.78004-1-sohaib.amhmd@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-bench.c | 5 +++-- tools/perf/tests/builtin-test.c | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index d0895162c2ba..d291f3a8af5f 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -226,7 +226,6 @@ static void run_collection(struct collection *coll) if (!bench->fn) break; printf("# Running %s/%s benchmark...\n", coll->name, bench->name); - fflush(stdout); argv[1] = bench->name; run_bench(coll->name, bench->name, bench->fn, 1, argv); @@ -247,6 +246,9 @@ int cmd_bench(int argc, const char **argv) struct collection *coll; int ret = 0; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + if (argc < 2) { /* No collection specified. */ print_usage(); @@ -300,7 +302,6 @@ int cmd_bench(int argc, const char **argv) if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); - fflush(stdout); ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index f1e6d2a3a578..1fb9f2a11d63 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -607,6 +607,9 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc - 1, argv + 1); From b0fde9c6e291e528878ea3713997777713fe44c6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 1 Dec 2021 14:08:55 -0800 Subject: [PATCH 068/493] perf arm-spe: Add SPE total latency as PERF_SAMPLE_WEIGHT Use total latency info in the SPE counter packet as sample weight so that we can see it in local_weight and (global) weight sort keys. Maybe we can use PERF_SAMPLE_WEIGHT_STRUCT to support ins_lat as well but I'm not sure which latency it matches. So just adding total latency first. Reviewed-by: Leo Yan Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: German Gomez Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211201220855.1260688-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 ++ tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 1 + tools/perf/util/arm-spe.c | 5 ++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 3fc528c9270c..5e390a1a79ab 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -179,6 +179,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.phys_addr = ip; break; case ARM_SPE_COUNTER: + if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) + decoder->record.latency = payload; break; case ARM_SPE_CONTEXT: decoder->record.context_id = payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 46a8556a9e95..69b31084d6be 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -33,6 +33,7 @@ struct arm_spe_record { enum arm_spe_sample_type type; int err; u32 op; + u32 latency; u64 from_ip; u64 to_ip; u64 timestamp; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index fccac06b573a..8a3828f86901 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -330,6 +330,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, sample.addr = record->virt_addr; sample.phys_addr = record->phys_addr; sample.data_src = data_src; + sample.weight = record->latency; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -347,6 +348,7 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; sample.addr = record->to_ip; + sample.weight = record->latency; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -993,7 +995,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.type = PERF_TYPE_HARDWARE; attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | - PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; + PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_WEIGHT; if (spe->timeless_decoding) attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; else From 888569dbcd80a0bb87371e9fb0fa3802c7aa9b14 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Tue, 23 Nov 2021 22:18:21 +0100 Subject: [PATCH 069/493] perf dlfilter: Drop unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling tools/perf/dlfilters/dlfilter-test-api-v0.c result in: checking for stdlib.h... dlfilters/dlfilter-test-api-v0.c: In function ‘filter_event’: dlfilters/dlfilter-test-api-v0.c:311:29: warning: unused variable ‘d’ [-Wunused-variable] 311 | struct filter_data *d = data; | So remove the variable now. Reviewed-by: German Gomez Signed-off-by: Salvatore Bonaccorso Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20211123211821.132924-1-carnil@debian.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/dlfilters/dlfilter-test-api-v0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 7565a1852c74..b17eb52a0694 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -308,8 +308,6 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) { - struct filter_data *d = data; - pr_debug("%s API\n", __func__); return do_checks(data, sample, ctx, false); From 3987d65f45ed51a4650e911baa68f9b6ed4623cb Mon Sep 17 00:00:00 2001 From: Andrew Kilroy Date: Fri, 10 Dec 2021 12:37:04 +0000 Subject: [PATCH 070/493] perf vendor events: For the Arm Neoverse N2 Updates the common and microarch json file to add counters available in the Arm Neoverse N2 chip, but should also apply to other ArmV8 and ArmV9 cpus. Specified in ArmV8 architecture reference manual https://developer.arm.com/documentation/ddi0487/gb/?lang=en Some of the counters added to armv8-common-and-microarch.json are specified in the ArmV9 architecture reference manual supplement (issue A.a): https://developer.arm.com/documentation/ddi0608/aa The additional ArmV9 counters are TRB_WRAP TRCEXTOUT0 TRCEXTOUT1 TRCEXTOUT2 TRCEXTOUT3 CTI_TRIGOUT4 CTI_TRIGOUT5 CTI_TRIGOUT6 CTI_TRIGOUT7 This patch also adds files in pmu-events/arch/arm64/arm/neoverse-n2 for perf list to output the counter names in categories. Counters on the Neoverse N2 are stated in its reference manual: https://developer.arm.com/documentation/102099/0000 Reviewed-by: John Garry Signed-off-by: Andrew Kilroy Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211210123706.7490-2-andrew.kilroy@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/arm/neoverse-n2/branch.json | 8 + .../arch/arm64/arm/neoverse-n2/bus.json | 20 ++ .../arch/arm64/arm/neoverse-n2/cache.json | 155 ++++++++++++++ .../arch/arm64/arm/neoverse-n2/exception.json | 47 +++++ .../arm64/arm/neoverse-n2/instruction.json | 143 +++++++++++++ .../arch/arm64/arm/neoverse-n2/memory.json | 38 ++++ .../arch/arm64/arm/neoverse-n2/other.json | 5 + .../arch/arm64/arm/neoverse-n2/pipeline.json | 23 ++ .../arch/arm64/arm/neoverse-n2/spe.json | 14 ++ .../arch/arm64/arm/neoverse-n2/trace.json | 29 +++ .../arm64/armv8-common-and-microarch.json | 198 ++++++++++++++++++ tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 + 12 files changed, 681 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json new file mode 100644 index 000000000000..79f2016c53b0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json new file mode 100644 index 000000000000..579c1c993d17 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json new file mode 100644 index 000000000000..0141f749bff3 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json @@ -0,0 +1,155 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L3D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L3D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L3D_CACHE" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "LL_CACHE_RD" + }, + { + "ArchStdEvent": "LL_CACHE_MISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "ArchStdEvent": "L3D_CACHE_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L3D_CACHE_LMISS_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json new file mode 100644 index 000000000000..344a2d552ad5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json new file mode 100644 index 000000000000..e57cd55937c6 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json @@ -0,0 +1,143 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "ASE_INST_SPEC" + }, + { + "ArchStdEvent": "SVE_INST_SPEC" + }, + { + "ArchStdEvent": "FP_HP_SPEC" + }, + { + "ArchStdEvent": "FP_SP_SPEC" + }, + { + "ArchStdEvent": "FP_DP_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_EMPTY_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_FULL_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_FAULT_SPEC" + }, + { + "ArchStdEvent": "FP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT8_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT16_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT32_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT64_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json new file mode 100644 index 000000000000..e522113aeb96 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json @@ -0,0 +1,38 @@ +[ + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json new file mode 100644 index 000000000000..20d8365756c5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "REMOTE_ACCESS" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json new file mode 100644 index 000000000000..f9fae15f7555 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json @@ -0,0 +1,23 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json new file mode 100644 index 000000000000..20f2165c85fe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json new file mode 100644 index 000000000000..3116135c59e2 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json @@ -0,0 +1,29 @@ +[ + { + "ArchStdEvent": "TRB_WRAP" + }, + { + "ArchStdEvent": "TRCEXTOUT0" + }, + { + "ArchStdEvent": "TRCEXTOUT1" + }, + { + "ArchStdEvent": "TRCEXTOUT2" + }, + { + "ArchStdEvent": "TRCEXTOUT3" + }, + { + "ArchStdEvent": "CTI_TRIGOUT4" + }, + { + "ArchStdEvent": "CTI_TRIGOUT5" + }, + { + "ArchStdEvent": "CTI_TRIGOUT6" + }, + { + "ArchStdEvent": "CTI_TRIGOUT7" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json index 423767510aff..80d7a70829a0 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json @@ -299,6 +299,30 @@ "EventName": "STALL_SLOT", "BriefDescription": "No operation sent for execution on a slot" }, + { + "PublicDescription": "Sample Population", + "EventCode": "0x4000", + "EventName": "SAMPLE_POP", + "BriefDescription": "Sample Population" + }, + { + "PublicDescription": "Sample Taken", + "EventCode": "0x4001", + "EventName": "SAMPLE_FEED", + "BriefDescription": "Sample Taken" + }, + { + "PublicDescription": "Sample Taken and not removed by filtering", + "EventCode": "0x4002", + "EventName": "SAMPLE_FILTRATE", + "BriefDescription": "Sample Taken and not removed by filtering" + }, + { + "PublicDescription": "Sample collided with previous sample", + "EventCode": "0x4003", + "EventName": "SAMPLE_COLLISION", + "BriefDescription": "Sample collided with previous sample" + }, { "PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.", "EventCode": "0x4004", @@ -329,6 +353,96 @@ "EventName": "L3D_CACHE_LMISS_RD", "BriefDescription": "Level 3 data cache long-latency read miss" }, + { + "PublicDescription": "Trace buffer current write pointer wrapped", + "EventCode": "0x400C", + "EventName": "TRB_WRAP", + "BriefDescription": "Trace buffer current write pointer wrapped" + }, + { + "PublicDescription": "PE Trace Unit external output 0", + "EventCode": "0x4010", + "EventName": "TRCEXTOUT0", + "BriefDescription": "PE Trace Unit external output 0" + }, + { + "PublicDescription": "PE Trace Unit external output 1", + "EventCode": "0x4011", + "EventName": "TRCEXTOUT1", + "BriefDescription": "PE Trace Unit external output 1" + }, + { + "PublicDescription": "PE Trace Unit external output 2", + "EventCode": "0x4012", + "EventName": "TRCEXTOUT2", + "BriefDescription": "PE Trace Unit external output 2" + }, + { + "PublicDescription": "PE Trace Unit external output 3", + "EventCode": "0x4013", + "EventName": "TRCEXTOUT3", + "BriefDescription": "PE Trace Unit external output 3" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 4", + "EventCode": "0x4018", + "EventName": "CTI_TRIGOUT4", + "BriefDescription": "Cross-trigger Interface output trigger 4" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 5 ", + "EventCode": "0x4019", + "EventName": "CTI_TRIGOUT5", + "BriefDescription": "Cross-trigger Interface output trigger 5 " + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 6", + "EventCode": "0x401A", + "EventName": "CTI_TRIGOUT6", + "BriefDescription": "Cross-trigger Interface output trigger 6" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 7", + "EventCode": "0x401B", + "EventName": "CTI_TRIGOUT7", + "BriefDescription": "Cross-trigger Interface output trigger 7" + }, + { + "PublicDescription": "Access with additional latency from alignment", + "EventCode": "0x4020", + "EventName": "LDST_ALIGN_LAT", + "BriefDescription": "Access with additional latency from alignment" + }, + { + "PublicDescription": "Load with additional latency from alignment", + "EventCode": "0x4021", + "EventName": "LD_ALIGN_LAT", + "BriefDescription": "Load with additional latency from alignment" + }, + { + "PublicDescription": "Store with additional latency from alignment", + "EventCode": "0x4022", + "EventName": "ST_ALIGN_LAT", + "BriefDescription": "Store with additional latency from alignment" + }, + { + "PublicDescription": "Checked data memory access", + "EventCode": "0x4024", + "EventName": "MEM_ACCESS_CHECKED", + "BriefDescription": "Checked data memory access" + }, + { + "PublicDescription": "Checked data memory access, read", + "EventCode": "0x4025", + "EventName": "MEM_ACCESS_CHECKED_RD", + "BriefDescription": "Checked data memory access, read" + }, + { + "PublicDescription": "Checked data memory access, write", + "EventCode": "0x4026", + "EventName": "MEM_ACCESS_CHECKED_WR", + "BriefDescription": "Checked data memory access, write" + }, { "PublicDescription": "SIMD Instruction architecturally executed.", "EventCode": "0x8000", @@ -341,6 +455,18 @@ "EventName": "SVE_INST_RETIRED", "BriefDescription": "Instruction architecturally executed, SVE." }, + { + "PublicDescription": "ASE operations speculatively executed", + "EventCode": "0x8005", + "EventName": "ASE_INST_SPEC", + "BriefDescription": "ASE operations speculatively executed" + }, + { + "PublicDescription": "SVE operations speculatively executed", + "EventCode": "0x8006", + "EventName": "SVE_INST_SPEC", + "BriefDescription": "SVE operations speculatively executed" + }, { "PublicDescription": "Microarchitectural operation, Operations speculatively executed.", "EventCode": "0x8008", @@ -359,6 +485,24 @@ "EventName": "FP_SPEC", "BriefDescription": "Floating-point Operations speculatively executed." }, + { + "PublicDescription": "Floating-point half-precision operations speculatively executed", + "EventCode": "0x8014", + "EventName": "FP_HP_SPEC", + "BriefDescription": "Floating-point half-precision operations speculatively executed" + }, + { + "PublicDescription": "Floating-point single-precision operations speculatively executed", + "EventCode": "0x8018", + "EventName": "FP_SP_SPEC", + "BriefDescription": "Floating-point single-precision operations speculatively executed" + }, + { + "PublicDescription": "Floating-point double-precision operations speculatively executed", + "EventCode": "0x801C", + "EventName": "FP_DP_SPEC", + "BriefDescription": "Floating-point double-precision operations speculatively executed" + }, { "PublicDescription": "Floating-point FMA Operations speculatively executed.", "EventCode": "0x8028", @@ -389,6 +533,30 @@ "EventName": "SVE_PRED_SPEC", "BriefDescription": "SVE predicated Operations speculatively executed." }, + { + "PublicDescription": "SVE predicated operations with no active predicates speculatively executed", + "EventCode": "0x8075", + "EventName": "SVE_PRED_EMPTY_SPEC", + "BriefDescription": "SVE predicated operations with no active predicates speculatively executed" + }, + { + "PublicDescription": "SVE predicated operations speculatively executed with all active predicates", + "EventCode": "0x8076", + "EventName": "SVE_PRED_FULL_SPEC", + "BriefDescription": "SVE predicated operations speculatively executed with all active predicates" + }, + { + "PublicDescription": "SVE predicated operations speculatively executed with partially active predicates", + "EventCode": "0x8077", + "EventName": "SVE_PRED_PARTIAL_SPEC", + "BriefDescription": "SVE predicated operations speculatively executed with partially active predicates" + }, + { + "PublicDescription": "SVE predicated operations with empty or partially active predicates", + "EventCode": "0x8079", + "EventName": "SVE_PRED_NOT_FULL_SPEC", + "BriefDescription": "SVE predicated operations with empty or partially active predicates" + }, { "PublicDescription": "SVE MOVPRFX Operations speculatively executed.", "EventCode": "0x807C", @@ -497,6 +665,12 @@ "EventName": "SVE_LDFF_SPEC", "BriefDescription": "SVE First-fault load Operations speculatively executed." }, + { + "PublicDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0", + "EventCode": "0x80BD", + "EventName": "SVE_LDFF_FAULT_SPEC", + "BriefDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0" + }, { "PublicDescription": "Scalable floating-point element Operations speculatively executed.", "EventCode": "0x80C0", @@ -544,5 +718,29 @@ "EventCode": "0x80C7", "EventName": "FP_DP_FIXED_OPS_SPEC", "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed", + "EventCode": "0x80E3", + "EventName": "ASE_SVE_INT8_SPEC", + "BriefDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed", + "EventCode": "0x80E7", + "EventName": "ASE_SVE_INT16_SPEC", + "BriefDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed", + "EventCode": "0x80EB", + "EventName": "ASE_SVE_INT32_SPEC", + "BriefDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed", + "EventCode": "0x80EF", + "EventName": "ASE_SVE_INT64_SPEC", + "BriefDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed" } ] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 31d8b57ca9bb..b899db48c12a 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -19,6 +19,7 @@ 0x00000000410fd0b0,v1,arm/cortex-a76-n1,core 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 0x00000000410fd400,v1,arm/neoverse-v1,core +0x00000000410fd490,v1,arm/neoverse-n2,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core From 6732f10b11c63112ce34a064b247c00e090b9514 Mon Sep 17 00:00:00 2001 From: Andrew Kilroy Date: Fri, 10 Dec 2021 12:37:05 +0000 Subject: [PATCH 071/493] perf vendor events: Rename arm64 arch std event files A previous commit adds pmu events into the files armv8-common-and-microarch.json armv8-recommended.json that are actually specified in an armv9 reference supplement, not armv8. As such, naming the files with the armv8 prefix seems artificial. This patch renames the files to reflect that these two files are for arch std events regardless of whether they are defined in armv8 or armv9. Reviewed-by: John Garry Signed-off-by: Andrew Kilroy Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211210123706.7490-3-andrew.kilroy@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- ...{armv8-common-and-microarch.json => common-and-microarch.json} | 0 .../arch/arm64/{armv8-recommended.json => recommended.json} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tools/perf/pmu-events/arch/arm64/{armv8-common-and-microarch.json => common-and-microarch.json} (100%) rename tools/perf/pmu-events/arch/arm64/{armv8-recommended.json => recommended.json} (100%) diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json rename to tools/perf/pmu-events/arch/arm64/common-and-microarch.json diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/recommended.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/armv8-recommended.json rename to tools/perf/pmu-events/arch/arm64/recommended.json From 7cc9680c4be7c1da7a3570711f01273b781b936b Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 8 Dec 2021 11:54:35 +0000 Subject: [PATCH 072/493] perf cs-etm: Remove duplicate and incorrect aux size checks There are two checks, one is for size when running without admin, but this one is covered by the driver and reported on in more detail here (builtin-record.c): pr_err("Permission error mapping pages.\n" "Consider increasing " "/proc/sys/kernel/perf_event_mlock_kb,\n" "or try again with a smaller value of -m/--mmap_pages.\n" "(current value: %u,%u)\n", This had the effect of artificially limiting the aux buffer size to a value smaller than what was allowed because perf_event_mlock_kb wasn't taken into account. The second is to check for a power of two, but this is covered here (evlist.c): pr_info("rounding mmap pages size to %s (%lu pages)\n", buf, pages); Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211208115435.610101-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 293a23bf8be3..8a3d54a86c9c 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -407,25 +407,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, } - /* Validate auxtrace_mmap_pages provided by user */ - if (opts->auxtrace_mmap_pages) { - unsigned int max_page = (KiB(128) / page_size); - size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; - - if (!privileged && - opts->auxtrace_mmap_pages > max_page) { - opts->auxtrace_mmap_pages = max_page; - pr_err("auxtrace too big, truncating to %d\n", - max_page); - } - - if (!is_power_of_2(sz)) { - pr_err("Invalid mmap size for %s: must be a power of 2\n", - CORESIGHT_ETM_PMU_NAME); - return -EINVAL; - } - } - if (opts->auxtrace_snapshot_mode) pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, opts->auxtrace_snapshot_size); From 8acf3793eae4d809658b1ebeed68d818d6d38142 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 12 Dec 2021 13:56:09 +0000 Subject: [PATCH 073/493] perf bpf-loader: Use IS_ERR_OR_NULL() to clean code and fix check Use IS_ERR_OR_NULL() to make the code cleaner. Also if the priv is NULL, it's improper to call PTR_ERR(priv). Signed-off-by: Miaoqian Lin Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Cc: unlisted-recipients Link: http://lore.kernel.org/lkml/20211212135613.20000-1-linmq006@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index fbb3c4057c30..22662fc85cc9 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -421,7 +421,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n, size_t prologue_cnt = 0; int i, err; - if (IS_ERR(priv) || !priv || priv->is_tp) + if (IS_ERR_OR_NULL(priv) || priv->is_tp) goto errout; pev = &priv->pev; @@ -570,7 +570,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) bool need_prologue = false; int err, i; - if (IS_ERR(priv) || !priv) { + if (IS_ERR_OR_NULL(priv)) { pr_debug("Internal error when hook preprocessor\n"); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -642,8 +642,11 @@ int bpf__probe(struct bpf_object *obj) goto out; priv = bpf_program__priv(prog); - if (IS_ERR(priv) || !priv) { - err = PTR_ERR(priv); + if (IS_ERR_OR_NULL(priv)) { + if (!priv) + err = -BPF_LOADER_ERRNO__INTERNAL; + else + err = PTR_ERR(priv); goto out; } @@ -693,7 +696,7 @@ int bpf__unprobe(struct bpf_object *obj) struct bpf_prog_priv *priv = bpf_program__priv(prog); int i; - if (IS_ERR(priv) || !priv || priv->is_tp) + if (IS_ERR_OR_NULL(priv) || priv->is_tp) continue; for (i = 0; i < priv->pev.ntevs; i++) { @@ -751,7 +754,7 @@ int bpf__foreach_event(struct bpf_object *obj, struct perf_probe_event *pev; int i, fd; - if (IS_ERR(priv) || !priv) { + if (IS_ERR_OR_NULL(priv)) { pr_debug("bpf: failed to get private field\n"); return -BPF_LOADER_ERRNO__INTERNAL; } From 017f7d1fac1c40eba6d97490a75bc1999914ef75 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 12 Dec 2021 22:21:22 +0000 Subject: [PATCH 074/493] libperf tests: Fix a spelling mistake "Runnnig" -> "Running" There is a spelling mistake in a __T_VERBOSE message. Fix it. Signed-off-by: Colin Ian King Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lore.kernel.org/lkml/20211212222122.478537-1-colin.i.king@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 520a78267743..e7afff12c35a 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -535,7 +535,7 @@ static int test_stat_multiplexing(void) (double)counts[i].run / (double)counts[i].ena * 100.0, counts[i].run, counts[i].ena); } else if (scaled == -1) { - __T_VERBOSE("\t Not Runnnig\n"); + __T_VERBOSE("\t Not Running\n"); } else { __T_VERBOSE("\t Not Scaling\n"); } From 5d28a17c1c0e9dbd299015f91800aff3466ebedf Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 12 Dec 2021 21:47:20 +0800 Subject: [PATCH 075/493] perf namespaces: Add helper nsinfo__is_in_root_namespace() Refactors code for gathering PID infos, it creates the function nsinfo__get_nspid() to parse process 'status' node in folder '/proc'. Base on the refactoring, this patch introduces a new helper nsinfo__is_in_root_namespace(), it returns true when the caller runs in the root PID namespace. Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jin Yao Cc: Jiri Olsa Cc: John Fastabend Cc: John Garry Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Yonatan Goldschmidt Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20211212134721.1721245-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/namespaces.c | 76 ++++++++++++++++++++++-------------- tools/perf/util/namespaces.h | 2 + 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 608b20c72a5c..48aa3217300b 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -60,17 +60,49 @@ void namespaces__free(struct namespaces *namespaces) free(namespaces); } +static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path) +{ + FILE *f = NULL; + char *statln = NULL; + size_t linesz = 0; + char *nspid; + + f = fopen(path, "r"); + if (f == NULL) + return -1; + + while (getline(&statln, &linesz, f) != -1) { + /* Use tgid if CONFIG_PID_NS is not defined. */ + if (strstr(statln, "Tgid:") != NULL) { + nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), + NULL, 10); + nsi->nstgid = nsi->tgid; + } + + if (strstr(statln, "NStgid:") != NULL) { + nspid = strrchr(statln, '\t'); + nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); + /* + * If innermost tgid is not the first, process is in a different + * PID namespace. + */ + nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; + break; + } + } + + fclose(f); + free(statln); + return 0; +} + int nsinfo__init(struct nsinfo *nsi) { char oldns[PATH_MAX]; char spath[PATH_MAX]; char *newns = NULL; - char *statln = NULL; - char *nspid; struct stat old_stat; struct stat new_stat; - FILE *f = NULL; - size_t linesz = 0; int rv = -1; if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) @@ -100,34 +132,9 @@ int nsinfo__init(struct nsinfo *nsi) if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX) goto out; - f = fopen(spath, "r"); - if (f == NULL) - goto out; - - while (getline(&statln, &linesz, f) != -1) { - /* Use tgid if CONFIG_PID_NS is not defined. */ - if (strstr(statln, "Tgid:") != NULL) { - nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), - NULL, 10); - nsi->nstgid = nsi->tgid; - } - - if (strstr(statln, "NStgid:") != NULL) { - nspid = strrchr(statln, '\t'); - nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); - /* If innermost tgid is not the first, process is in a different - * PID namespace. - */ - nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; - break; - } - } - rv = 0; + rv = nsinfo__get_nspid(nsi, spath); out: - if (f != NULL) - (void) fclose(f); - free(statln); free(newns); return rv; } @@ -299,3 +306,12 @@ int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi) return ret; } + +bool nsinfo__is_in_root_namespace(void) +{ + struct nsinfo nsi; + + memset(&nsi, 0x0, sizeof(nsi)); + nsinfo__get_nspid(&nsi, "/proc/self/status"); + return !nsi.in_pidns; +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index ad9775db7b9c..9ceea9643507 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -59,6 +59,8 @@ void nsinfo__mountns_exit(struct nscookie *nc); char *nsinfo__realpath(const char *path, struct nsinfo *nsi); int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi); +bool nsinfo__is_in_root_namespace(void); + static inline void __nsinfo__zput(struct nsinfo **nsip) { if (nsip) { From d3b58af9a8276c24a2aa80a059d87d99f5216d3b Mon Sep 17 00:00:00 2001 From: German Gomez Date: Tue, 7 Dec 2021 18:06:51 +0000 Subject: [PATCH 076/493] perf arm64: Rename perf_event_arm_regs for ARM64 registers The registers for ARM and ARM64 are enumerated using two enums that have the same name. In order to be able to import both headers, the name of one can be replaced using the C preprocessor like so: #define perf_event_arm_regs perf_event_arm64_regs #include #undef perf_event_arm_regs This patch updates all imports of ARM64's perf_regs.h in order to prevent the naming collision. Reviewed-by: Athira Jajeev Signed-off-by: German Gomez Tested-by: Athira Jajeev Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-csky@vger.kernel.org Cc: linux-riscv@lists.infradead.org Link: https://lore.kernel.org/r/20211207180653.1147374-3-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/include/perf_regs.h | 2 ++ tools/perf/util/libunwind/arm64.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h index fa3e07459f76..1f0d78b9f23b 100644 --- a/tools/perf/arch/arm64/include/perf_regs.h +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -4,7 +4,9 @@ #include #include +#define perf_event_arm_regs perf_event_arm64_regs #include +#undef perf_event_arm_regs void perf_regs_load(u64 *regs); diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index c397be0c2e32..15f60fd09424 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -23,7 +23,9 @@ #include "unwind.h" #include "libunwind-aarch64.h" +#define perf_event_arm_regs perf_event_arm64_regs #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#undef perf_event_arm_regs #include "../../arch/arm64/util/unwind-libunwind.c" /* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind, From 83869019c74cc2d01c96a3be2463a4eebe362224 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Tue, 7 Dec 2021 18:06:52 +0000 Subject: [PATCH 077/493] perf arch: Support register names from all archs When reading a perf.data file with register values, there is a mismatch between the names and the values of the registers because the tool is built using only the register names from the local architecture. Reading a perf.data file that was recorded on ARM64, gives the following erroneous output on an X86 machine: # perf report -i perf_arm64.data -D [...] 24661932634451 0x698 [0x21d0]: PERF_RECORD_SAMPLE(IP, 0x1): 43239/43239: 0xffffc5be8f100f98 period: 1 addr: 0 ... user regs: mask 0x1ffffffff ABI 64-bit .... AX 0x0000ffffd1515817 .... BX 0x0000ffffd1515480 .... CX 0x0000aaaadabf6c80 .... DX 0x000000000000002e .... SI 0x0000000040100401 .... DI 0x0040600200000080 .... BP 0x0000ffffd1510e10 .... SP 0x0000000000000000 .... IP 0x00000000000000dd .... FLAGS 0x0000ffffd1510cd0 .... CS 0x0000000000000000 .... SS 0x0000000000000030 .... DS 0x0000ffffa569a208 .... ES 0x0000000000000000 .... FS 0x0000000000000000 .... GS 0x0000000000000000 .... R8 0x0000aaaad3de9650 .... R9 0x0000ffffa57397f0 .... R10 0x0000000000000001 .... R11 0x0000ffffa57fd000 .... R12 0x0000ffffd1515817 .... R13 0x0000ffffd1515480 .... R14 0x0000aaaadabf6c80 .... R15 0x0000000000000000 .... unknown 0x0000000000000001 .... unknown 0x0000000000000000 .... unknown 0x0000000000000000 .... unknown 0x0000000000000000 .... unknown 0x0000000000000000 .... unknown 0x0000ffffd1510d90 .... unknown 0x0000ffffa5739b90 .... unknown 0x0000ffffd1510d80 .... XMM0 0x0000ffffa57392c8 ... thread: perf-exec:43239 ...... dso: [kernel.kallsyms] As can be seen, the register names correspond to X86 registers, even though the perf.data file was recorded on an ARM64 system. After this patch, the output of the command displays the correct register names: # perf report -i perf_arm64.data -D [...] 24661932634451 0x698 [0x21d0]: PERF_RECORD_SAMPLE(IP, 0x1): 43239/43239: 0xffffc5be8f100f98 period: 1 addr: 0 ... user regs: mask 0x1ffffffff ABI 64-bit .... x0 0x0000ffffd1515817 .... x1 0x0000ffffd1515480 .... x2 0x0000aaaadabf6c80 .... x3 0x000000000000002e .... x4 0x0000000040100401 .... x5 0x0040600200000080 .... x6 0x0000ffffd1510e10 .... x7 0x0000000000000000 .... x8 0x00000000000000dd .... x9 0x0000ffffd1510cd0 .... x10 0x0000000000000000 .... x11 0x0000000000000030 .... x12 0x0000ffffa569a208 .... x13 0x0000000000000000 .... x14 0x0000000000000000 .... x15 0x0000000000000000 .... x16 0x0000aaaad3de9650 .... x17 0x0000ffffa57397f0 .... x18 0x0000000000000001 .... x19 0x0000ffffa57fd000 .... x20 0x0000ffffd1515817 .... x21 0x0000ffffd1515480 .... x22 0x0000aaaadabf6c80 .... x23 0x0000000000000000 .... x24 0x0000000000000001 .... x25 0x0000000000000000 .... x26 0x0000000000000000 .... x27 0x0000000000000000 .... x28 0x0000000000000000 .... x29 0x0000ffffd1510d90 .... lr 0x0000ffffa5739b90 .... sp 0x0000ffffd1510d80 .... pc 0x0000ffffa57392c8 ... thread: perf-exec:43239 ...... dso: [kernel.kallsyms] Tester comments: Athira reports: "Looks good to me. Tested this patchset in powerpc by capturing regs in powerpc and doing perf report to read the data from x86." Reported-by: Alexandre Truong Reviewed-by: Athira Jajeev Signed-off-by: German Gomez Tested-by: Athira Jajeev Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-csky@vger.kernel.org Cc: linux-riscv@lists.infradead.org Link: https://lore.kernel.org/r/20211207180653.1147374-4-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/include/perf_regs.h | 42 -- tools/perf/arch/arm64/include/perf_regs.h | 76 -- tools/perf/arch/csky/include/perf_regs.h | 82 --- tools/perf/arch/mips/include/perf_regs.h | 69 -- tools/perf/arch/powerpc/include/perf_regs.h | 66 -- tools/perf/arch/riscv/include/perf_regs.h | 74 -- tools/perf/arch/s390/include/perf_regs.h | 78 -- tools/perf/arch/x86/include/perf_regs.h | 82 --- tools/perf/builtin-script.c | 18 +- tools/perf/util/perf_regs.c | 666 ++++++++++++++++++ tools/perf/util/perf_regs.h | 10 +- .../scripting-engines/trace-event-python.c | 10 +- tools/perf/util/session.c | 25 +- 13 files changed, 697 insertions(+), 601 deletions(-) diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h index 4085419283d0..99a06550e25d 100644 --- a/tools/perf/arch/arm/include/perf_regs.h +++ b/tools/perf/arch/arm/include/perf_regs.h @@ -15,46 +15,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM_PC #define PERF_REG_SP PERF_REG_ARM_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_ARM_R0: - return "r0"; - case PERF_REG_ARM_R1: - return "r1"; - case PERF_REG_ARM_R2: - return "r2"; - case PERF_REG_ARM_R3: - return "r3"; - case PERF_REG_ARM_R4: - return "r4"; - case PERF_REG_ARM_R5: - return "r5"; - case PERF_REG_ARM_R6: - return "r6"; - case PERF_REG_ARM_R7: - return "r7"; - case PERF_REG_ARM_R8: - return "r8"; - case PERF_REG_ARM_R9: - return "r9"; - case PERF_REG_ARM_R10: - return "r10"; - case PERF_REG_ARM_FP: - return "fp"; - case PERF_REG_ARM_IP: - return "ip"; - case PERF_REG_ARM_SP: - return "sp"; - case PERF_REG_ARM_LR: - return "lr"; - case PERF_REG_ARM_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h index 1f0d78b9f23b..35a3cc775b39 100644 --- a/tools/perf/arch/arm64/include/perf_regs.h +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -17,80 +17,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM64_PC #define PERF_REG_SP PERF_REG_ARM64_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_ARM64_X0: - return "x0"; - case PERF_REG_ARM64_X1: - return "x1"; - case PERF_REG_ARM64_X2: - return "x2"; - case PERF_REG_ARM64_X3: - return "x3"; - case PERF_REG_ARM64_X4: - return "x4"; - case PERF_REG_ARM64_X5: - return "x5"; - case PERF_REG_ARM64_X6: - return "x6"; - case PERF_REG_ARM64_X7: - return "x7"; - case PERF_REG_ARM64_X8: - return "x8"; - case PERF_REG_ARM64_X9: - return "x9"; - case PERF_REG_ARM64_X10: - return "x10"; - case PERF_REG_ARM64_X11: - return "x11"; - case PERF_REG_ARM64_X12: - return "x12"; - case PERF_REG_ARM64_X13: - return "x13"; - case PERF_REG_ARM64_X14: - return "x14"; - case PERF_REG_ARM64_X15: - return "x15"; - case PERF_REG_ARM64_X16: - return "x16"; - case PERF_REG_ARM64_X17: - return "x17"; - case PERF_REG_ARM64_X18: - return "x18"; - case PERF_REG_ARM64_X19: - return "x19"; - case PERF_REG_ARM64_X20: - return "x20"; - case PERF_REG_ARM64_X21: - return "x21"; - case PERF_REG_ARM64_X22: - return "x22"; - case PERF_REG_ARM64_X23: - return "x23"; - case PERF_REG_ARM64_X24: - return "x24"; - case PERF_REG_ARM64_X25: - return "x25"; - case PERF_REG_ARM64_X26: - return "x26"; - case PERF_REG_ARM64_X27: - return "x27"; - case PERF_REG_ARM64_X28: - return "x28"; - case PERF_REG_ARM64_X29: - return "x29"; - case PERF_REG_ARM64_SP: - return "sp"; - case PERF_REG_ARM64_LR: - return "lr"; - case PERF_REG_ARM64_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h index 25ac3bdcb9d1..1afcc0e916c2 100644 --- a/tools/perf/arch/csky/include/perf_regs.h +++ b/tools/perf/arch/csky/include/perf_regs.h @@ -15,86 +15,4 @@ #define PERF_REG_IP PERF_REG_CSKY_PC #define PERF_REG_SP PERF_REG_CSKY_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_CSKY_A0: - return "a0"; - case PERF_REG_CSKY_A1: - return "a1"; - case PERF_REG_CSKY_A2: - return "a2"; - case PERF_REG_CSKY_A3: - return "a3"; - case PERF_REG_CSKY_REGS0: - return "regs0"; - case PERF_REG_CSKY_REGS1: - return "regs1"; - case PERF_REG_CSKY_REGS2: - return "regs2"; - case PERF_REG_CSKY_REGS3: - return "regs3"; - case PERF_REG_CSKY_REGS4: - return "regs4"; - case PERF_REG_CSKY_REGS5: - return "regs5"; - case PERF_REG_CSKY_REGS6: - return "regs6"; - case PERF_REG_CSKY_REGS7: - return "regs7"; - case PERF_REG_CSKY_REGS8: - return "regs8"; - case PERF_REG_CSKY_REGS9: - return "regs9"; - case PERF_REG_CSKY_SP: - return "sp"; - case PERF_REG_CSKY_LR: - return "lr"; - case PERF_REG_CSKY_PC: - return "pc"; -#if defined(__CSKYABIV2__) - case PERF_REG_CSKY_EXREGS0: - return "exregs0"; - case PERF_REG_CSKY_EXREGS1: - return "exregs1"; - case PERF_REG_CSKY_EXREGS2: - return "exregs2"; - case PERF_REG_CSKY_EXREGS3: - return "exregs3"; - case PERF_REG_CSKY_EXREGS4: - return "exregs4"; - case PERF_REG_CSKY_EXREGS5: - return "exregs5"; - case PERF_REG_CSKY_EXREGS6: - return "exregs6"; - case PERF_REG_CSKY_EXREGS7: - return "exregs7"; - case PERF_REG_CSKY_EXREGS8: - return "exregs8"; - case PERF_REG_CSKY_EXREGS9: - return "exregs9"; - case PERF_REG_CSKY_EXREGS10: - return "exregs10"; - case PERF_REG_CSKY_EXREGS11: - return "exregs11"; - case PERF_REG_CSKY_EXREGS12: - return "exregs12"; - case PERF_REG_CSKY_EXREGS13: - return "exregs13"; - case PERF_REG_CSKY_EXREGS14: - return "exregs14"; - case PERF_REG_CSKY_TLS: - return "tls"; - case PERF_REG_CSKY_HI: - return "hi"; - case PERF_REG_CSKY_LO: - return "lo"; -#endif - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h index ee73b36a14d1..b8cd8bbb37ba 100644 --- a/tools/perf/arch/mips/include/perf_regs.h +++ b/tools/perf/arch/mips/include/perf_regs.h @@ -12,73 +12,4 @@ #define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1) -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_MIPS_PC: - return "PC"; - case PERF_REG_MIPS_R1: - return "$1"; - case PERF_REG_MIPS_R2: - return "$2"; - case PERF_REG_MIPS_R3: - return "$3"; - case PERF_REG_MIPS_R4: - return "$4"; - case PERF_REG_MIPS_R5: - return "$5"; - case PERF_REG_MIPS_R6: - return "$6"; - case PERF_REG_MIPS_R7: - return "$7"; - case PERF_REG_MIPS_R8: - return "$8"; - case PERF_REG_MIPS_R9: - return "$9"; - case PERF_REG_MIPS_R10: - return "$10"; - case PERF_REG_MIPS_R11: - return "$11"; - case PERF_REG_MIPS_R12: - return "$12"; - case PERF_REG_MIPS_R13: - return "$13"; - case PERF_REG_MIPS_R14: - return "$14"; - case PERF_REG_MIPS_R15: - return "$15"; - case PERF_REG_MIPS_R16: - return "$16"; - case PERF_REG_MIPS_R17: - return "$17"; - case PERF_REG_MIPS_R18: - return "$18"; - case PERF_REG_MIPS_R19: - return "$19"; - case PERF_REG_MIPS_R20: - return "$20"; - case PERF_REG_MIPS_R21: - return "$21"; - case PERF_REG_MIPS_R22: - return "$22"; - case PERF_REG_MIPS_R23: - return "$23"; - case PERF_REG_MIPS_R24: - return "$24"; - case PERF_REG_MIPS_R25: - return "$25"; - case PERF_REG_MIPS_R28: - return "$28"; - case PERF_REG_MIPS_R29: - return "$29"; - case PERF_REG_MIPS_R30: - return "$30"; - case PERF_REG_MIPS_R31: - return "$31"; - default: - break; - } - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 93339d17acc4..9bb17c3f370b 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -19,70 +19,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_POWERPC_NIP #define PERF_REG_SP PERF_REG_POWERPC_R1 -static const char *reg_names[] = { - [PERF_REG_POWERPC_R0] = "r0", - [PERF_REG_POWERPC_R1] = "r1", - [PERF_REG_POWERPC_R2] = "r2", - [PERF_REG_POWERPC_R3] = "r3", - [PERF_REG_POWERPC_R4] = "r4", - [PERF_REG_POWERPC_R5] = "r5", - [PERF_REG_POWERPC_R6] = "r6", - [PERF_REG_POWERPC_R7] = "r7", - [PERF_REG_POWERPC_R8] = "r8", - [PERF_REG_POWERPC_R9] = "r9", - [PERF_REG_POWERPC_R10] = "r10", - [PERF_REG_POWERPC_R11] = "r11", - [PERF_REG_POWERPC_R12] = "r12", - [PERF_REG_POWERPC_R13] = "r13", - [PERF_REG_POWERPC_R14] = "r14", - [PERF_REG_POWERPC_R15] = "r15", - [PERF_REG_POWERPC_R16] = "r16", - [PERF_REG_POWERPC_R17] = "r17", - [PERF_REG_POWERPC_R18] = "r18", - [PERF_REG_POWERPC_R19] = "r19", - [PERF_REG_POWERPC_R20] = "r20", - [PERF_REG_POWERPC_R21] = "r21", - [PERF_REG_POWERPC_R22] = "r22", - [PERF_REG_POWERPC_R23] = "r23", - [PERF_REG_POWERPC_R24] = "r24", - [PERF_REG_POWERPC_R25] = "r25", - [PERF_REG_POWERPC_R26] = "r26", - [PERF_REG_POWERPC_R27] = "r27", - [PERF_REG_POWERPC_R28] = "r28", - [PERF_REG_POWERPC_R29] = "r29", - [PERF_REG_POWERPC_R30] = "r30", - [PERF_REG_POWERPC_R31] = "r31", - [PERF_REG_POWERPC_NIP] = "nip", - [PERF_REG_POWERPC_MSR] = "msr", - [PERF_REG_POWERPC_ORIG_R3] = "orig_r3", - [PERF_REG_POWERPC_CTR] = "ctr", - [PERF_REG_POWERPC_LINK] = "link", - [PERF_REG_POWERPC_XER] = "xer", - [PERF_REG_POWERPC_CCR] = "ccr", - [PERF_REG_POWERPC_SOFTE] = "softe", - [PERF_REG_POWERPC_TRAP] = "trap", - [PERF_REG_POWERPC_DAR] = "dar", - [PERF_REG_POWERPC_DSISR] = "dsisr", - [PERF_REG_POWERPC_SIER] = "sier", - [PERF_REG_POWERPC_MMCRA] = "mmcra", - [PERF_REG_POWERPC_MMCR0] = "mmcr0", - [PERF_REG_POWERPC_MMCR1] = "mmcr1", - [PERF_REG_POWERPC_MMCR2] = "mmcr2", - [PERF_REG_POWERPC_MMCR3] = "mmcr3", - [PERF_REG_POWERPC_SIER2] = "sier2", - [PERF_REG_POWERPC_SIER3] = "sier3", - [PERF_REG_POWERPC_PMC1] = "pmc1", - [PERF_REG_POWERPC_PMC2] = "pmc2", - [PERF_REG_POWERPC_PMC3] = "pmc3", - [PERF_REG_POWERPC_PMC4] = "pmc4", - [PERF_REG_POWERPC_PMC5] = "pmc5", - [PERF_REG_POWERPC_PMC6] = "pmc6", - [PERF_REG_POWERPC_SDAR] = "sdar", - [PERF_REG_POWERPC_SIAR] = "siar", -}; - -static inline const char *__perf_reg_name(int id) -{ - return reg_names[id]; -} #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h index 6b02a767c918..6944bf0de53e 100644 --- a/tools/perf/arch/riscv/include/perf_regs.h +++ b/tools/perf/arch/riscv/include/perf_regs.h @@ -19,78 +19,4 @@ #define PERF_REG_IP PERF_REG_RISCV_PC #define PERF_REG_SP PERF_REG_RISCV_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_RISCV_PC: - return "pc"; - case PERF_REG_RISCV_RA: - return "ra"; - case PERF_REG_RISCV_SP: - return "sp"; - case PERF_REG_RISCV_GP: - return "gp"; - case PERF_REG_RISCV_TP: - return "tp"; - case PERF_REG_RISCV_T0: - return "t0"; - case PERF_REG_RISCV_T1: - return "t1"; - case PERF_REG_RISCV_T2: - return "t2"; - case PERF_REG_RISCV_S0: - return "s0"; - case PERF_REG_RISCV_S1: - return "s1"; - case PERF_REG_RISCV_A0: - return "a0"; - case PERF_REG_RISCV_A1: - return "a1"; - case PERF_REG_RISCV_A2: - return "a2"; - case PERF_REG_RISCV_A3: - return "a3"; - case PERF_REG_RISCV_A4: - return "a4"; - case PERF_REG_RISCV_A5: - return "a5"; - case PERF_REG_RISCV_A6: - return "a6"; - case PERF_REG_RISCV_A7: - return "a7"; - case PERF_REG_RISCV_S2: - return "s2"; - case PERF_REG_RISCV_S3: - return "s3"; - case PERF_REG_RISCV_S4: - return "s4"; - case PERF_REG_RISCV_S5: - return "s5"; - case PERF_REG_RISCV_S6: - return "s6"; - case PERF_REG_RISCV_S7: - return "s7"; - case PERF_REG_RISCV_S8: - return "s8"; - case PERF_REG_RISCV_S9: - return "s9"; - case PERF_REG_RISCV_S10: - return "s10"; - case PERF_REG_RISCV_S11: - return "s11"; - case PERF_REG_RISCV_T3: - return "t3"; - case PERF_REG_RISCV_T4: - return "t4"; - case PERF_REG_RISCV_T5: - return "t5"; - case PERF_REG_RISCV_T6: - return "t6"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index ce3031526623..52fcc0891da6 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -14,82 +14,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_S390_PC #define PERF_REG_SP PERF_REG_S390_R15 -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_S390_R0: - return "R0"; - case PERF_REG_S390_R1: - return "R1"; - case PERF_REG_S390_R2: - return "R2"; - case PERF_REG_S390_R3: - return "R3"; - case PERF_REG_S390_R4: - return "R4"; - case PERF_REG_S390_R5: - return "R5"; - case PERF_REG_S390_R6: - return "R6"; - case PERF_REG_S390_R7: - return "R7"; - case PERF_REG_S390_R8: - return "R8"; - case PERF_REG_S390_R9: - return "R9"; - case PERF_REG_S390_R10: - return "R10"; - case PERF_REG_S390_R11: - return "R11"; - case PERF_REG_S390_R12: - return "R12"; - case PERF_REG_S390_R13: - return "R13"; - case PERF_REG_S390_R14: - return "R14"; - case PERF_REG_S390_R15: - return "R15"; - case PERF_REG_S390_FP0: - return "FP0"; - case PERF_REG_S390_FP1: - return "FP1"; - case PERF_REG_S390_FP2: - return "FP2"; - case PERF_REG_S390_FP3: - return "FP3"; - case PERF_REG_S390_FP4: - return "FP4"; - case PERF_REG_S390_FP5: - return "FP5"; - case PERF_REG_S390_FP6: - return "FP6"; - case PERF_REG_S390_FP7: - return "FP7"; - case PERF_REG_S390_FP8: - return "FP8"; - case PERF_REG_S390_FP9: - return "FP9"; - case PERF_REG_S390_FP10: - return "FP10"; - case PERF_REG_S390_FP11: - return "FP11"; - case PERF_REG_S390_FP12: - return "FP12"; - case PERF_REG_S390_FP13: - return "FP13"; - case PERF_REG_S390_FP14: - return "FP14"; - case PERF_REG_S390_FP15: - return "FP15"; - case PERF_REG_S390_MASK: - return "MASK"; - case PERF_REG_S390_PC: - return "PC"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index cddc4cdc0d9b..16e23b722042 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -23,86 +23,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_X86_IP #define PERF_REG_SP PERF_REG_X86_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_X86_AX: - return "AX"; - case PERF_REG_X86_BX: - return "BX"; - case PERF_REG_X86_CX: - return "CX"; - case PERF_REG_X86_DX: - return "DX"; - case PERF_REG_X86_SI: - return "SI"; - case PERF_REG_X86_DI: - return "DI"; - case PERF_REG_X86_BP: - return "BP"; - case PERF_REG_X86_SP: - return "SP"; - case PERF_REG_X86_IP: - return "IP"; - case PERF_REG_X86_FLAGS: - return "FLAGS"; - case PERF_REG_X86_CS: - return "CS"; - case PERF_REG_X86_SS: - return "SS"; - case PERF_REG_X86_DS: - return "DS"; - case PERF_REG_X86_ES: - return "ES"; - case PERF_REG_X86_FS: - return "FS"; - case PERF_REG_X86_GS: - return "GS"; -#ifdef HAVE_ARCH_X86_64_SUPPORT - case PERF_REG_X86_R8: - return "R8"; - case PERF_REG_X86_R9: - return "R9"; - case PERF_REG_X86_R10: - return "R10"; - case PERF_REG_X86_R11: - return "R11"; - case PERF_REG_X86_R12: - return "R12"; - case PERF_REG_X86_R13: - return "R13"; - case PERF_REG_X86_R14: - return "R14"; - case PERF_REG_X86_R15: - return "R15"; -#endif /* HAVE_ARCH_X86_64_SUPPORT */ - -#define XMM(x) \ - case PERF_REG_X86_XMM ## x: \ - case PERF_REG_X86_XMM ## x + 1: \ - return "XMM" #x; - XMM(0) - XMM(1) - XMM(2) - XMM(3) - XMM(4) - XMM(5) - XMM(6) - XMM(7) - XMM(8) - XMM(9) - XMM(10) - XMM(11) - XMM(12) - XMM(13) - XMM(14) - XMM(15) -#undef XMM - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9434367af166..da2175d70ac9 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -15,6 +15,7 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" +#include "util/env.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/evsel_fprintf.h" @@ -648,7 +649,7 @@ out: return 0; } -static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, +static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch, FILE *fp) { unsigned i = 0, r; @@ -661,7 +662,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; - printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val); + printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val); } return printed; @@ -718,17 +719,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen, } static int perf_sample__fprintf_iregs(struct perf_sample *sample, - struct perf_event_attr *attr, FILE *fp) + struct perf_event_attr *attr, const char *arch, FILE *fp) { return perf_sample__fprintf_regs(&sample->intr_regs, - attr->sample_regs_intr, fp); + attr->sample_regs_intr, arch, fp); } static int perf_sample__fprintf_uregs(struct perf_sample *sample, - struct perf_event_attr *attr, FILE *fp) + struct perf_event_attr *attr, const char *arch, FILE *fp) { return perf_sample__fprintf_regs(&sample->user_regs, - attr->sample_regs_user, fp); + attr->sample_regs_user, arch, fp); } static int perf_sample__fprintf_start(struct perf_script *script, @@ -2000,6 +2001,7 @@ static void process_event(struct perf_script *script, struct evsel_script *es = evsel->priv; FILE *fp = es->fp; char str[PAGE_SIZE_NAME_LEN]; + const char *arch = perf_env__arch(machine->env); if (output[type].fields == 0) return; @@ -2066,10 +2068,10 @@ static void process_event(struct perf_script *script, } if (PRINT_FIELD(IREGS)) - perf_sample__fprintf_iregs(sample, attr, fp); + perf_sample__fprintf_iregs(sample, attr, arch, fp); if (PRINT_FIELD(UREGS)) - perf_sample__fprintf_uregs(sample, attr, fp); + perf_sample__fprintf_uregs(sample, attr, arch, fp); if (PRINT_FIELD(BRSTACK)) perf_sample__fprintf_brstack(sample, thread, attr, fp); diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 06a7461ba864..a982e40ee5a9 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include "perf_regs.h" #include "event.h" @@ -20,6 +21,671 @@ uint64_t __weak arch__user_reg_mask(void) } #ifdef HAVE_PERF_REGS_SUPPORT + +#define perf_event_arm_regs perf_event_arm64_regs +#include "../../arch/arm64/include/uapi/asm/perf_regs.h" +#undef perf_event_arm_regs + +#include "../../arch/arm/include/uapi/asm/perf_regs.h" +#include "../../arch/csky/include/uapi/asm/perf_regs.h" +#include "../../arch/mips/include/uapi/asm/perf_regs.h" +#include "../../arch/powerpc/include/uapi/asm/perf_regs.h" +#include "../../arch/riscv/include/uapi/asm/perf_regs.h" +#include "../../arch/s390/include/uapi/asm/perf_regs.h" +#include "../../arch/x86/include/uapi/asm/perf_regs.h" + +static const char *__perf_reg_name_arm64(int id) +{ + switch (id) { + case PERF_REG_ARM64_X0: + return "x0"; + case PERF_REG_ARM64_X1: + return "x1"; + case PERF_REG_ARM64_X2: + return "x2"; + case PERF_REG_ARM64_X3: + return "x3"; + case PERF_REG_ARM64_X4: + return "x4"; + case PERF_REG_ARM64_X5: + return "x5"; + case PERF_REG_ARM64_X6: + return "x6"; + case PERF_REG_ARM64_X7: + return "x7"; + case PERF_REG_ARM64_X8: + return "x8"; + case PERF_REG_ARM64_X9: + return "x9"; + case PERF_REG_ARM64_X10: + return "x10"; + case PERF_REG_ARM64_X11: + return "x11"; + case PERF_REG_ARM64_X12: + return "x12"; + case PERF_REG_ARM64_X13: + return "x13"; + case PERF_REG_ARM64_X14: + return "x14"; + case PERF_REG_ARM64_X15: + return "x15"; + case PERF_REG_ARM64_X16: + return "x16"; + case PERF_REG_ARM64_X17: + return "x17"; + case PERF_REG_ARM64_X18: + return "x18"; + case PERF_REG_ARM64_X19: + return "x19"; + case PERF_REG_ARM64_X20: + return "x20"; + case PERF_REG_ARM64_X21: + return "x21"; + case PERF_REG_ARM64_X22: + return "x22"; + case PERF_REG_ARM64_X23: + return "x23"; + case PERF_REG_ARM64_X24: + return "x24"; + case PERF_REG_ARM64_X25: + return "x25"; + case PERF_REG_ARM64_X26: + return "x26"; + case PERF_REG_ARM64_X27: + return "x27"; + case PERF_REG_ARM64_X28: + return "x28"; + case PERF_REG_ARM64_X29: + return "x29"; + case PERF_REG_ARM64_SP: + return "sp"; + case PERF_REG_ARM64_LR: + return "lr"; + case PERF_REG_ARM64_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_arm(int id) +{ + switch (id) { + case PERF_REG_ARM_R0: + return "r0"; + case PERF_REG_ARM_R1: + return "r1"; + case PERF_REG_ARM_R2: + return "r2"; + case PERF_REG_ARM_R3: + return "r3"; + case PERF_REG_ARM_R4: + return "r4"; + case PERF_REG_ARM_R5: + return "r5"; + case PERF_REG_ARM_R6: + return "r6"; + case PERF_REG_ARM_R7: + return "r7"; + case PERF_REG_ARM_R8: + return "r8"; + case PERF_REG_ARM_R9: + return "r9"; + case PERF_REG_ARM_R10: + return "r10"; + case PERF_REG_ARM_FP: + return "fp"; + case PERF_REG_ARM_IP: + return "ip"; + case PERF_REG_ARM_SP: + return "sp"; + case PERF_REG_ARM_LR: + return "lr"; + case PERF_REG_ARM_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_csky(int id) +{ + switch (id) { + case PERF_REG_CSKY_A0: + return "a0"; + case PERF_REG_CSKY_A1: + return "a1"; + case PERF_REG_CSKY_A2: + return "a2"; + case PERF_REG_CSKY_A3: + return "a3"; + case PERF_REG_CSKY_REGS0: + return "regs0"; + case PERF_REG_CSKY_REGS1: + return "regs1"; + case PERF_REG_CSKY_REGS2: + return "regs2"; + case PERF_REG_CSKY_REGS3: + return "regs3"; + case PERF_REG_CSKY_REGS4: + return "regs4"; + case PERF_REG_CSKY_REGS5: + return "regs5"; + case PERF_REG_CSKY_REGS6: + return "regs6"; + case PERF_REG_CSKY_REGS7: + return "regs7"; + case PERF_REG_CSKY_REGS8: + return "regs8"; + case PERF_REG_CSKY_REGS9: + return "regs9"; + case PERF_REG_CSKY_SP: + return "sp"; + case PERF_REG_CSKY_LR: + return "lr"; + case PERF_REG_CSKY_PC: + return "pc"; +#if defined(__CSKYABIV2__) + case PERF_REG_CSKY_EXREGS0: + return "exregs0"; + case PERF_REG_CSKY_EXREGS1: + return "exregs1"; + case PERF_REG_CSKY_EXREGS2: + return "exregs2"; + case PERF_REG_CSKY_EXREGS3: + return "exregs3"; + case PERF_REG_CSKY_EXREGS4: + return "exregs4"; + case PERF_REG_CSKY_EXREGS5: + return "exregs5"; + case PERF_REG_CSKY_EXREGS6: + return "exregs6"; + case PERF_REG_CSKY_EXREGS7: + return "exregs7"; + case PERF_REG_CSKY_EXREGS8: + return "exregs8"; + case PERF_REG_CSKY_EXREGS9: + return "exregs9"; + case PERF_REG_CSKY_EXREGS10: + return "exregs10"; + case PERF_REG_CSKY_EXREGS11: + return "exregs11"; + case PERF_REG_CSKY_EXREGS12: + return "exregs12"; + case PERF_REG_CSKY_EXREGS13: + return "exregs13"; + case PERF_REG_CSKY_EXREGS14: + return "exregs14"; + case PERF_REG_CSKY_TLS: + return "tls"; + case PERF_REG_CSKY_HI: + return "hi"; + case PERF_REG_CSKY_LO: + return "lo"; +#endif + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_mips(int id) +{ + switch (id) { + case PERF_REG_MIPS_PC: + return "PC"; + case PERF_REG_MIPS_R1: + return "$1"; + case PERF_REG_MIPS_R2: + return "$2"; + case PERF_REG_MIPS_R3: + return "$3"; + case PERF_REG_MIPS_R4: + return "$4"; + case PERF_REG_MIPS_R5: + return "$5"; + case PERF_REG_MIPS_R6: + return "$6"; + case PERF_REG_MIPS_R7: + return "$7"; + case PERF_REG_MIPS_R8: + return "$8"; + case PERF_REG_MIPS_R9: + return "$9"; + case PERF_REG_MIPS_R10: + return "$10"; + case PERF_REG_MIPS_R11: + return "$11"; + case PERF_REG_MIPS_R12: + return "$12"; + case PERF_REG_MIPS_R13: + return "$13"; + case PERF_REG_MIPS_R14: + return "$14"; + case PERF_REG_MIPS_R15: + return "$15"; + case PERF_REG_MIPS_R16: + return "$16"; + case PERF_REG_MIPS_R17: + return "$17"; + case PERF_REG_MIPS_R18: + return "$18"; + case PERF_REG_MIPS_R19: + return "$19"; + case PERF_REG_MIPS_R20: + return "$20"; + case PERF_REG_MIPS_R21: + return "$21"; + case PERF_REG_MIPS_R22: + return "$22"; + case PERF_REG_MIPS_R23: + return "$23"; + case PERF_REG_MIPS_R24: + return "$24"; + case PERF_REG_MIPS_R25: + return "$25"; + case PERF_REG_MIPS_R28: + return "$28"; + case PERF_REG_MIPS_R29: + return "$29"; + case PERF_REG_MIPS_R30: + return "$30"; + case PERF_REG_MIPS_R31: + return "$31"; + default: + break; + } + return NULL; +} + +static const char *__perf_reg_name_powerpc(int id) +{ + switch (id) { + case PERF_REG_POWERPC_R0: + return "r0"; + case PERF_REG_POWERPC_R1: + return "r1"; + case PERF_REG_POWERPC_R2: + return "r2"; + case PERF_REG_POWERPC_R3: + return "r3"; + case PERF_REG_POWERPC_R4: + return "r4"; + case PERF_REG_POWERPC_R5: + return "r5"; + case PERF_REG_POWERPC_R6: + return "r6"; + case PERF_REG_POWERPC_R7: + return "r7"; + case PERF_REG_POWERPC_R8: + return "r8"; + case PERF_REG_POWERPC_R9: + return "r9"; + case PERF_REG_POWERPC_R10: + return "r10"; + case PERF_REG_POWERPC_R11: + return "r11"; + case PERF_REG_POWERPC_R12: + return "r12"; + case PERF_REG_POWERPC_R13: + return "r13"; + case PERF_REG_POWERPC_R14: + return "r14"; + case PERF_REG_POWERPC_R15: + return "r15"; + case PERF_REG_POWERPC_R16: + return "r16"; + case PERF_REG_POWERPC_R17: + return "r17"; + case PERF_REG_POWERPC_R18: + return "r18"; + case PERF_REG_POWERPC_R19: + return "r19"; + case PERF_REG_POWERPC_R20: + return "r20"; + case PERF_REG_POWERPC_R21: + return "r21"; + case PERF_REG_POWERPC_R22: + return "r22"; + case PERF_REG_POWERPC_R23: + return "r23"; + case PERF_REG_POWERPC_R24: + return "r24"; + case PERF_REG_POWERPC_R25: + return "r25"; + case PERF_REG_POWERPC_R26: + return "r26"; + case PERF_REG_POWERPC_R27: + return "r27"; + case PERF_REG_POWERPC_R28: + return "r28"; + case PERF_REG_POWERPC_R29: + return "r29"; + case PERF_REG_POWERPC_R30: + return "r30"; + case PERF_REG_POWERPC_R31: + return "r31"; + case PERF_REG_POWERPC_NIP: + return "nip"; + case PERF_REG_POWERPC_MSR: + return "msr"; + case PERF_REG_POWERPC_ORIG_R3: + return "orig_r3"; + case PERF_REG_POWERPC_CTR: + return "ctr"; + case PERF_REG_POWERPC_LINK: + return "link"; + case PERF_REG_POWERPC_XER: + return "xer"; + case PERF_REG_POWERPC_CCR: + return "ccr"; + case PERF_REG_POWERPC_SOFTE: + return "softe"; + case PERF_REG_POWERPC_TRAP: + return "trap"; + case PERF_REG_POWERPC_DAR: + return "dar"; + case PERF_REG_POWERPC_DSISR: + return "dsisr"; + case PERF_REG_POWERPC_SIER: + return "sier"; + case PERF_REG_POWERPC_MMCRA: + return "mmcra"; + case PERF_REG_POWERPC_MMCR0: + return "mmcr0"; + case PERF_REG_POWERPC_MMCR1: + return "mmcr1"; + case PERF_REG_POWERPC_MMCR2: + return "mmcr2"; + case PERF_REG_POWERPC_MMCR3: + return "mmcr3"; + case PERF_REG_POWERPC_SIER2: + return "sier2"; + case PERF_REG_POWERPC_SIER3: + return "sier3"; + case PERF_REG_POWERPC_PMC1: + return "pmc1"; + case PERF_REG_POWERPC_PMC2: + return "pmc2"; + case PERF_REG_POWERPC_PMC3: + return "pmc3"; + case PERF_REG_POWERPC_PMC4: + return "pmc4"; + case PERF_REG_POWERPC_PMC5: + return "pmc5"; + case PERF_REG_POWERPC_PMC6: + return "pmc6"; + case PERF_REG_POWERPC_SDAR: + return "sdar"; + case PERF_REG_POWERPC_SIAR: + return "siar"; + default: + break; + } + return NULL; +} + +static const char *__perf_reg_name_riscv(int id) +{ + switch (id) { + case PERF_REG_RISCV_PC: + return "pc"; + case PERF_REG_RISCV_RA: + return "ra"; + case PERF_REG_RISCV_SP: + return "sp"; + case PERF_REG_RISCV_GP: + return "gp"; + case PERF_REG_RISCV_TP: + return "tp"; + case PERF_REG_RISCV_T0: + return "t0"; + case PERF_REG_RISCV_T1: + return "t1"; + case PERF_REG_RISCV_T2: + return "t2"; + case PERF_REG_RISCV_S0: + return "s0"; + case PERF_REG_RISCV_S1: + return "s1"; + case PERF_REG_RISCV_A0: + return "a0"; + case PERF_REG_RISCV_A1: + return "a1"; + case PERF_REG_RISCV_A2: + return "a2"; + case PERF_REG_RISCV_A3: + return "a3"; + case PERF_REG_RISCV_A4: + return "a4"; + case PERF_REG_RISCV_A5: + return "a5"; + case PERF_REG_RISCV_A6: + return "a6"; + case PERF_REG_RISCV_A7: + return "a7"; + case PERF_REG_RISCV_S2: + return "s2"; + case PERF_REG_RISCV_S3: + return "s3"; + case PERF_REG_RISCV_S4: + return "s4"; + case PERF_REG_RISCV_S5: + return "s5"; + case PERF_REG_RISCV_S6: + return "s6"; + case PERF_REG_RISCV_S7: + return "s7"; + case PERF_REG_RISCV_S8: + return "s8"; + case PERF_REG_RISCV_S9: + return "s9"; + case PERF_REG_RISCV_S10: + return "s10"; + case PERF_REG_RISCV_S11: + return "s11"; + case PERF_REG_RISCV_T3: + return "t3"; + case PERF_REG_RISCV_T4: + return "t4"; + case PERF_REG_RISCV_T5: + return "t5"; + case PERF_REG_RISCV_T6: + return "t6"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_s390(int id) +{ + switch (id) { + case PERF_REG_S390_R0: + return "R0"; + case PERF_REG_S390_R1: + return "R1"; + case PERF_REG_S390_R2: + return "R2"; + case PERF_REG_S390_R3: + return "R3"; + case PERF_REG_S390_R4: + return "R4"; + case PERF_REG_S390_R5: + return "R5"; + case PERF_REG_S390_R6: + return "R6"; + case PERF_REG_S390_R7: + return "R7"; + case PERF_REG_S390_R8: + return "R8"; + case PERF_REG_S390_R9: + return "R9"; + case PERF_REG_S390_R10: + return "R10"; + case PERF_REG_S390_R11: + return "R11"; + case PERF_REG_S390_R12: + return "R12"; + case PERF_REG_S390_R13: + return "R13"; + case PERF_REG_S390_R14: + return "R14"; + case PERF_REG_S390_R15: + return "R15"; + case PERF_REG_S390_FP0: + return "FP0"; + case PERF_REG_S390_FP1: + return "FP1"; + case PERF_REG_S390_FP2: + return "FP2"; + case PERF_REG_S390_FP3: + return "FP3"; + case PERF_REG_S390_FP4: + return "FP4"; + case PERF_REG_S390_FP5: + return "FP5"; + case PERF_REG_S390_FP6: + return "FP6"; + case PERF_REG_S390_FP7: + return "FP7"; + case PERF_REG_S390_FP8: + return "FP8"; + case PERF_REG_S390_FP9: + return "FP9"; + case PERF_REG_S390_FP10: + return "FP10"; + case PERF_REG_S390_FP11: + return "FP11"; + case PERF_REG_S390_FP12: + return "FP12"; + case PERF_REG_S390_FP13: + return "FP13"; + case PERF_REG_S390_FP14: + return "FP14"; + case PERF_REG_S390_FP15: + return "FP15"; + case PERF_REG_S390_MASK: + return "MASK"; + case PERF_REG_S390_PC: + return "PC"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_x86(int id) +{ + switch (id) { + case PERF_REG_X86_AX: + return "AX"; + case PERF_REG_X86_BX: + return "BX"; + case PERF_REG_X86_CX: + return "CX"; + case PERF_REG_X86_DX: + return "DX"; + case PERF_REG_X86_SI: + return "SI"; + case PERF_REG_X86_DI: + return "DI"; + case PERF_REG_X86_BP: + return "BP"; + case PERF_REG_X86_SP: + return "SP"; + case PERF_REG_X86_IP: + return "IP"; + case PERF_REG_X86_FLAGS: + return "FLAGS"; + case PERF_REG_X86_CS: + return "CS"; + case PERF_REG_X86_SS: + return "SS"; + case PERF_REG_X86_DS: + return "DS"; + case PERF_REG_X86_ES: + return "ES"; + case PERF_REG_X86_FS: + return "FS"; + case PERF_REG_X86_GS: + return "GS"; + case PERF_REG_X86_R8: + return "R8"; + case PERF_REG_X86_R9: + return "R9"; + case PERF_REG_X86_R10: + return "R10"; + case PERF_REG_X86_R11: + return "R11"; + case PERF_REG_X86_R12: + return "R12"; + case PERF_REG_X86_R13: + return "R13"; + case PERF_REG_X86_R14: + return "R14"; + case PERF_REG_X86_R15: + return "R15"; + +#define XMM(x) \ + case PERF_REG_X86_XMM ## x: \ + case PERF_REG_X86_XMM ## x + 1: \ + return "XMM" #x; + XMM(0) + XMM(1) + XMM(2) + XMM(3) + XMM(4) + XMM(5) + XMM(6) + XMM(7) + XMM(8) + XMM(9) + XMM(10) + XMM(11) + XMM(12) + XMM(13) + XMM(14) + XMM(15) +#undef XMM + default: + return NULL; + } + + return NULL; +} + +const char *perf_reg_name(int id, const char *arch) +{ + const char *reg_name = NULL; + + if (!strcmp(arch, "csky")) + reg_name = __perf_reg_name_csky(id); + else if (!strcmp(arch, "mips")) + reg_name = __perf_reg_name_mips(id); + else if (!strcmp(arch, "powerpc")) + reg_name = __perf_reg_name_powerpc(id); + else if (!strcmp(arch, "riscv")) + reg_name = __perf_reg_name_riscv(id); + else if (!strcmp(arch, "s390")) + reg_name = __perf_reg_name_s390(id); + else if (!strcmp(arch, "x86")) + reg_name = __perf_reg_name_x86(id); + else if (!strcmp(arch, "arm")) + reg_name = __perf_reg_name_arm(id); + else if (!strcmp(arch, "arm64")) + reg_name = __perf_reg_name_arm64(id); + + return reg_name ?: "unknown"; +} + int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index eeac181ebccf..4e6b1299c571 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -31,22 +31,16 @@ extern const struct sample_reg sample_reg_masks[]; #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) +const char *perf_reg_name(int id, const char *arch); int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); -static inline const char *perf_reg_name(int id) -{ - const char *reg_name = __perf_reg_name(id); - - return reg_name ?: "unknown"; -} - #else #define PERF_REGS_MASK 0 #define PERF_REGS_MAX 0 #define DWARF_MINIMAL_REGS PERF_REGS_MASK -static inline const char *perf_reg_name(int id __maybe_unused) +static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused) { return "unknown"; } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c0c010350bc2..0445bee9290f 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -36,6 +36,7 @@ #include "../debug.h" #include "../dso.h" #include "../callchain.h" +#include "../env.h" #include "../evsel.h" #include "../event.h" #include "../thread.h" @@ -687,7 +688,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict, _PyUnicode_FromString(decode)); } -static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) +static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size) { unsigned int i = 0, r; int printed = 0; @@ -702,7 +703,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) printed += scnprintf(bf + printed, size - printed, "%5s:0x%" PRIx64 " ", - perf_reg_name(r), val); + perf_reg_name(r, arch), val); } } @@ -711,6 +712,7 @@ static void set_regs_in_dict(PyObject *dict, struct evsel *evsel) { struct perf_event_attr *attr = &evsel->core.attr; + const char *arch = perf_env__arch(evsel__env(evsel)); /* * Here value 28 is a constant size which can be used to print @@ -722,12 +724,12 @@ static void set_regs_in_dict(PyObject *dict, int size = __sw_hweight64(attr->sample_regs_intr) * 28; char bf[size]; - regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf)); + regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf)); pydict_set_item_string_decref(dict, "iregs", _PyUnicode_FromString(bf)); - regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf)); + regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf)); pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d8857d1b6d7c..e1a273048681 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -15,6 +15,7 @@ #include "map_symbol.h" #include "branch.h" #include "debug.h" +#include "env.h" #include "evlist.h" #include "evsel.h" #include "memswap.h" @@ -1168,7 +1169,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } -static void regs_dump__printf(u64 mask, u64 *regs) +static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) { unsigned rid, i = 0; @@ -1176,7 +1177,7 @@ static void regs_dump__printf(u64 mask, u64 *regs) u64 val = regs[i++]; printf(".... %-5s 0x%016" PRIx64 "\n", - perf_reg_name(rid), val); + perf_reg_name(rid, arch), val); } } @@ -1194,7 +1195,7 @@ static inline const char *regs_dump_abi(struct regs_dump *d) return regs_abi[d->abi]; } -static void regs__printf(const char *type, struct regs_dump *regs) +static void regs__printf(const char *type, struct regs_dump *regs, const char *arch) { u64 mask = regs->mask; @@ -1203,23 +1204,23 @@ static void regs__printf(const char *type, struct regs_dump *regs) mask, regs_dump_abi(regs)); - regs_dump__printf(mask, regs->regs); + regs_dump__printf(mask, regs->regs, arch); } -static void regs_user__printf(struct perf_sample *sample) +static void regs_user__printf(struct perf_sample *sample, const char *arch) { struct regs_dump *user_regs = &sample->user_regs; if (user_regs->regs) - regs__printf("user", user_regs); + regs__printf("user", user_regs, arch); } -static void regs_intr__printf(struct perf_sample *sample) +static void regs_intr__printf(struct perf_sample *sample, const char *arch) { struct regs_dump *intr_regs = &sample->intr_regs; if (intr_regs->regs) - regs__printf("intr", intr_regs); + regs__printf("intr", intr_regs, arch); } static void stack_user__printf(struct stack_dump *dump) @@ -1304,7 +1305,7 @@ char *get_page_size_name(u64 size, char *str) } static void dump_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample) + struct perf_sample *sample, const char *arch) { u64 sample_type; char str[PAGE_SIZE_NAME_LEN]; @@ -1325,10 +1326,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); if (sample_type & PERF_SAMPLE_REGS_USER) - regs_user__printf(sample); + regs_user__printf(sample, arch); if (sample_type & PERF_SAMPLE_REGS_INTR) - regs_intr__printf(sample); + regs_intr__printf(sample, arch); if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); @@ -1502,7 +1503,7 @@ static int machines__deliver_event(struct machines *machines, ++evlist->stats.nr_unknown_id; return 0; } - dump_sample(evsel, event, sample); + dump_sample(evsel, event, sample, perf_env__arch(machine->env)); if (machine == NULL) { ++evlist->stats.nr_unprocessable_samples; return 0; From 416e15ad17f84358ba3eca6b82378be97c793c62 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 15 Dec 2021 10:51:50 -0800 Subject: [PATCH 078/493] perf ftrace: Add 'trace' subcommand This is a preparation to add more sub-commands for ftrace. The 'trace' subcommand does the same thing when no subcommand is given. Committer testing: The previous mode, i.e. no subcommand and the new 'perf ftrace trace' are equivalent: # perf ftrace -G check_preempt_curr sleep 0.00001 # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 25) | check_preempt_curr() { 25) | resched_curr() { 25) | native_smp_send_reschedule() { 25) | default_send_IPI_single_phys() { 25) 0.110 us | __default_send_IPI_dest_field(); 25) 0.490 us | } 25) 0.640 us | } 25) 0.850 us | } 25) 2.060 us | } # perf ftrace trace -G check_preempt_curr sleep 0.00001 # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 10) | check_preempt_curr() { 10) | resched_curr() { 10) | native_smp_send_reschedule() { 10) | default_send_IPI_single_phys() { 10) 0.080 us | __default_send_IPI_dest_field(); 10) 0.460 us | } 10) 0.610 us | } 10) 0.830 us | } 10) 2.020 us | } # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211215185154.360314-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 87cb11a7a3ee..b28e762c5d54 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -879,17 +879,7 @@ int cmd_ftrace(int argc, const char **argv) .tracer = DEFAULT_TRACER, .target = { .uid = UINT_MAX, }, }; - const char * const ftrace_usage[] = { - "perf ftrace [] []", - "perf ftrace [] -- []", - NULL - }; - const struct option ftrace_options[] = { - OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", - "Tracer to use: function_graph(default) or function"), - OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", - "Show available functions to filter", - opt_list_avail_functions, "*"), + const struct option common_options[] = { OPT_STRING('p', "pid", &ftrace.target.pid, "pid", "Trace on existing process id"), /* TODO: Add short option -t after -t/--tracer can be removed. */ @@ -901,6 +891,14 @@ int cmd_ftrace(int argc, const char **argv) "System-wide collection from all CPUs"), OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", "List of cpus to monitor"), + OPT_END() + }; + const struct option ftrace_options[] = { + OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", + "Tracer to use: function_graph(default) or function"), + OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", + "Show available functions to filter", + opt_list_avail_functions, "*"), OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", "Trace given functions using function tracer", parse_filter_func), @@ -923,7 +921,15 @@ int cmd_ftrace(int argc, const char **argv) "Trace children processes"), OPT_UINTEGER('D', "delay", &ftrace.initial_delay, "Number of milliseconds to wait before starting tracing after program start"), - OPT_END() + OPT_PARENT(common_options), + }; + + const char * const ftrace_usage[] = { + "perf ftrace [] []", + "perf ftrace [] -- [] []", + "perf ftrace trace [] []", + "perf ftrace trace [] -- [] []", + NULL }; INIT_LIST_HEAD(&ftrace.filters); @@ -935,6 +941,11 @@ int cmd_ftrace(int argc, const char **argv) if (ret < 0) return -1; + if (argc > 1 && !strcmp(argv[1], "trace")) { + argc--; + argv++; + } + argc = parse_options(argc, argv, ftrace_options, ftrace_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && target__none(&ftrace.target)) From a9b8ae8ae347941fefd6596f62586b13ae032e4b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 15 Dec 2021 10:51:51 -0800 Subject: [PATCH 079/493] perf ftrace: Move out common code from __cmd_ftrace The signal setup code and evlist__prepare_workload() can be used for other subcommands. Let's move them out of the __cmd_ftrace(). Then it doesn't need to pass argc and argv. On the other hand, select_tracer() is specific to the 'trace' subcommand so it'd better moving it into the __cmd_ftrace(). Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211215185154.360314-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 63 +++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index b28e762c5d54..0f8310bd0e6c 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -565,7 +565,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace) return 0; } -static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) +static void select_tracer(struct perf_ftrace *ftrace) +{ + bool graph = !list_empty(&ftrace->graph_funcs) || + !list_empty(&ftrace->nograph_funcs); + bool func = !list_empty(&ftrace->filters) || + !list_empty(&ftrace->notrace); + + /* The function_graph has priority over function tracer. */ + if (graph) + ftrace->tracer = "function_graph"; + else if (func) + ftrace->tracer = "function"; + /* Otherwise, the default tracer is used. */ + + pr_debug("%s tracer is used\n", ftrace->tracer); +} + +static int __cmd_ftrace(struct perf_ftrace *ftrace) { char *trace_file; int trace_fd; @@ -586,10 +603,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) return -1; } - signal(SIGINT, sig_handler); - signal(SIGUSR1, sig_handler); - signal(SIGCHLD, sig_handler); - signal(SIGPIPE, sig_handler); + select_tracer(ftrace); if (reset_tracing_files(ftrace) < 0) { pr_err("failed to reset ftrace\n"); @@ -600,11 +614,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) if (write_tracing_file("trace", "0") < 0) goto out; - if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false, - ftrace__workload_exec_failed_signal) < 0) { - goto out; - } - if (set_tracing_options(ftrace) < 0) goto out_reset; @@ -855,23 +864,6 @@ static int parse_graph_tracer_opts(const struct option *opt, return 0; } -static void select_tracer(struct perf_ftrace *ftrace) -{ - bool graph = !list_empty(&ftrace->graph_funcs) || - !list_empty(&ftrace->nograph_funcs); - bool func = !list_empty(&ftrace->filters) || - !list_empty(&ftrace->notrace); - - /* The function_graph has priority over function tracer. */ - if (graph) - ftrace->tracer = "function_graph"; - else if (func) - ftrace->tracer = "function"; - /* Otherwise, the default tracer is used. */ - - pr_debug("%s tracer is used\n", ftrace->tracer); -} - int cmd_ftrace(int argc, const char **argv) { int ret; @@ -937,6 +929,11 @@ int cmd_ftrace(int argc, const char **argv) INIT_LIST_HEAD(&ftrace.graph_funcs); INIT_LIST_HEAD(&ftrace.nograph_funcs); + signal(SIGINT, sig_handler); + signal(SIGUSR1, sig_handler); + signal(SIGCHLD, sig_handler); + signal(SIGPIPE, sig_handler); + ret = perf_config(perf_ftrace_config, &ftrace); if (ret < 0) return -1; @@ -951,8 +948,6 @@ int cmd_ftrace(int argc, const char **argv) if (!argc && target__none(&ftrace.target)) ftrace.target.system_wide = true; - select_tracer(&ftrace); - ret = target__validate(&ftrace.target); if (ret) { char errbuf[512]; @@ -972,7 +967,15 @@ int cmd_ftrace(int argc, const char **argv) if (ret < 0) goto out_delete_evlist; - ret = __cmd_ftrace(&ftrace, argc, argv); + if (argc) { + ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target, + argv, false, + ftrace__workload_exec_failed_signal); + if (ret < 0) + goto out_delete_evlist; + } + + ret = __cmd_ftrace(&ftrace); out_delete_evlist: evlist__delete(ftrace.evlist); From 53be50282269b46c678ae5a9f54acf7416a10dbb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 15 Dec 2021 10:51:52 -0800 Subject: [PATCH 080/493] perf ftrace: Add 'latency' subcommand The perf ftrace latency is to get a histogram of function execution time. Users should give a function name using -T option. This is implemented using function_graph tracer with the given function only. And it parses the output to extract the time. $ sudo perf ftrace latency -a -T mutex_lock sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 4596 | ######################## | 1 - 2 us | 1680 | ######### | 2 - 4 us | 1106 | ##### | 4 - 8 us | 546 | ## | 8 - 16 us | 562 | ### | 16 - 32 us | 1 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | Committer testing: Latency for the __handle_mm_fault kernel function, system wide for 1 second, see how one can go from the usual 'perf ftrace' output, now the same as for the 'perf ftrace trace' subcommand, to the new 'perf ftrace latency' subcommand: # perf ftrace -T __handle_mm_fault -a sleep 1 | wc -l 709 # perf ftrace -T __handle_mm_fault -a sleep 1 | wc -l 510 # perf ftrace -T __handle_mm_fault -a sleep 1 | head -20 # tracer: function # # entries-in-buffer/entries-written: 0/0 #P:32 # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | perf-exec-1685104 [007] 90638.894613: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894620: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894622: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894635: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894688: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894702: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894714: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894728: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894740: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894751: __handle_mm_fault <-handle_mm_fault sleep-1685104 [007] 90638.894962: __handle_mm_fault <-handle_mm_fault sleep-1685104 [007] 90638.894977: __handle_mm_fault <-handle_mm_fault sleep-1685104 [007] 90638.894983: __handle_mm_fault <-handle_mm_fault sleep-1685104 [007] 90638.894995: __handle_mm_fault <-handle_mm_fault # perf ftrace latency -T __handle_mm_fault -a sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 125 | ###### | 1 - 2 us | 249 | ############# | 2 - 4 us | 455 | ######################## | 4 - 8 us | 37 | # | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211215185154.360314-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 287 ++++++++++++++++++++++++++++++++++-- 1 file changed, 277 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 0f8310bd0e6c..8fd3c9c44c69 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include #include #include @@ -702,6 +704,224 @@ out: return (done && !workload_exec_errno) ? 0 : -1; } +#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */ + +static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf) +{ + char *p, *q; + char *unit; + double num; + int i; + + /* ensure NUL termination */ + buf[len] = '\0'; + + /* handle data line by line */ + for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) { + *q = '\0'; + /* move it to the line buffer */ + strcat(linebuf, p); + + /* + * parse trace output to get function duration like in + * + * # tracer: function_graph + * # + * # CPU DURATION FUNCTION CALLS + * # | | | | | | | + * 1) + 10.291 us | do_filp_open(); + * 1) 4.889 us | do_filp_open(); + * 1) 6.086 us | do_filp_open(); + * + */ + if (linebuf[0] == '#') + goto next; + + /* ignore CPU */ + p = strchr(linebuf, ')'); + if (p == NULL) + p = linebuf; + + while (*p && !isdigit(*p) && (*p != '|')) + p++; + + /* no duration */ + if (*p == '\0' || *p == '|') + goto next; + + num = strtod(p, &unit); + if (!unit || strncmp(unit, " us", 3)) + goto next; + + i = log2(num); + if (i < 0) + i = 0; + if (i >= NUM_BUCKET) + i = NUM_BUCKET - 1; + + buckets[i]++; + +next: + /* empty the line buffer for the next output */ + linebuf[0] = '\0'; + } + + /* preserve any remaining output (before newline) */ + strcat(linebuf, p); +} + +static void display_histogram(int buckets[]) +{ + int i; + int total = 0; + int bar_total = 46; /* to fit in 80 column */ + char bar[] = "###############################################"; + int bar_len; + + for (i = 0; i < NUM_BUCKET; i++) + total += buckets[i]; + + if (total == 0) { + printf("No data found\n"); + return; + } + + printf("# %14s | %10s | %-*s |\n", + " DURATION ", "COUNT", bar_total, "GRAPH"); + + bar_len = buckets[0] * bar_total / total; + printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", + 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); + + for (i = 1; i < NUM_BUCKET - 1; i++) { + int start = (1 << (i - 1)); + int stop = 1 << i; + const char *unit = "us"; + + if (start >= 1024) { + start >>= 10; + stop >>= 10; + unit = "ms"; + } + bar_len = buckets[i] * bar_total / total; + printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", + start, stop, unit, buckets[i], bar_len, bar, + bar_total - bar_len, ""); + } + + bar_len = buckets[NUM_BUCKET - 1] * bar_total / total; + printf(" %4d - %-4s %s | %10d | %.*s%*s |\n", + 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar, + bar_total - bar_len, ""); + +} + +static int __cmd_latency(struct perf_ftrace *ftrace) +{ + char *trace_file; + int trace_fd; + char buf[4096]; + char line[256]; + struct pollfd pollfd = { + .events = POLLIN, + }; + int buckets[NUM_BUCKET] = { }; + + if (!(perf_cap__capable(CAP_PERFMON) || + perf_cap__capable(CAP_SYS_ADMIN))) { + pr_err("ftrace only works for %s!\n", +#ifdef HAVE_LIBCAP_SUPPORT + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" +#else + "root" +#endif + ); + return -1; + } + + if (reset_tracing_files(ftrace) < 0) { + pr_err("failed to reset ftrace\n"); + goto out; + } + + /* reset ftrace buffer */ + if (write_tracing_file("trace", "0") < 0) + goto out; + + if (set_tracing_options(ftrace) < 0) + goto out_reset; + + /* force to use the function_graph tracer to track duration */ + if (write_tracing_file("current_tracer", "function_graph") < 0) { + pr_err("failed to set current_tracer to function_graph\n"); + goto out_reset; + } + + trace_file = get_tracing_file("trace_pipe"); + if (!trace_file) { + pr_err("failed to open trace_pipe\n"); + goto out_reset; + } + + trace_fd = open(trace_file, O_RDONLY); + + put_tracing_file(trace_file); + + if (trace_fd < 0) { + pr_err("failed to open trace_pipe\n"); + goto out_reset; + } + + fcntl(trace_fd, F_SETFL, O_NONBLOCK); + pollfd.fd = trace_fd; + + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + goto out_close_fd; + } + + evlist__start_workload(ftrace->evlist); + + line[0] = '\0'; + while (!done) { + if (poll(&pollfd, 1, -1) < 0) + break; + + if (pollfd.revents & POLLIN) { + int n = read(trace_fd, buf, sizeof(buf) - 1); + if (n < 0) + break; + + make_histogram(buckets, buf, n, line); + } + } + + write_tracing_file("tracing_on", "0"); + + if (workload_exec_errno) { + const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); + pr_err("workload failed: %s\n", emsg); + goto out_close_fd; + } + + /* read remaining buffer contents */ + while (true) { + int n = read(trace_fd, buf, sizeof(buf) - 1); + if (n <= 0) + break; + make_histogram(buckets, buf, n, line); + } + + display_histogram(buckets); + +out_close_fd: + close(trace_fd); +out_reset: + reset_tracing_files(ftrace); +out: + return (done && !workload_exec_errno) ? 0 : -1; +} + static int perf_ftrace_config(const char *var, const char *value, void *cb) { struct perf_ftrace *ftrace = cb; @@ -864,6 +1084,12 @@ static int parse_graph_tracer_opts(const struct option *opt, return 0; } +enum perf_ftrace_subcommand { + PERF_FTRACE_NONE, + PERF_FTRACE_TRACE, + PERF_FTRACE_LATENCY, +}; + int cmd_ftrace(int argc, const char **argv) { int ret; @@ -915,14 +1141,21 @@ int cmd_ftrace(int argc, const char **argv) "Number of milliseconds to wait before starting tracing after program start"), OPT_PARENT(common_options), }; + const struct option latency_options[] = { + OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", + "Show latency of given function", parse_filter_func), + OPT_PARENT(common_options), + }; + const struct option *options = ftrace_options; const char * const ftrace_usage[] = { "perf ftrace [] []", "perf ftrace [] -- [] []", - "perf ftrace trace [] []", - "perf ftrace trace [] -- [] []", + "perf ftrace {trace|latency} [] []", + "perf ftrace {trace|latency} [] -- [] []", NULL }; + enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE; INIT_LIST_HEAD(&ftrace.filters); INIT_LIST_HEAD(&ftrace.notrace); @@ -938,15 +1171,29 @@ int cmd_ftrace(int argc, const char **argv) if (ret < 0) return -1; - if (argc > 1 && !strcmp(argv[1], "trace")) { - argc--; - argv++; - } + if (argc > 1) { + if (!strcmp(argv[1], "trace")) { + subcmd = PERF_FTRACE_TRACE; + } else if (!strcmp(argv[1], "latency")) { + subcmd = PERF_FTRACE_LATENCY; + options = latency_options; + } - argc = parse_options(argc, argv, ftrace_options, ftrace_usage, + if (subcmd != PERF_FTRACE_NONE) { + argc--; + argv++; + } + } + /* for backward compatibility */ + if (subcmd == PERF_FTRACE_NONE) + subcmd = PERF_FTRACE_TRACE; + + argc = parse_options(argc, argv, options, ftrace_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc && target__none(&ftrace.target)) - ftrace.target.system_wide = true; + if (argc < 0) { + ret = -EINVAL; + goto out_delete_filters; + } ret = target__validate(&ftrace.target); if (ret) { @@ -975,7 +1222,27 @@ int cmd_ftrace(int argc, const char **argv) goto out_delete_evlist; } - ret = __cmd_ftrace(&ftrace); + switch (subcmd) { + case PERF_FTRACE_TRACE: + if (!argc && target__none(&ftrace.target)) + ftrace.target.system_wide = true; + ret = __cmd_ftrace(&ftrace); + break; + case PERF_FTRACE_LATENCY: + if (list_empty(&ftrace.filters)) { + pr_err("Should provide a function to measure\n"); + parse_options_usage(ftrace_usage, options, "T", 1); + ret = -EINVAL; + goto out_delete_evlist; + } + ret = __cmd_latency(&ftrace); + break; + case PERF_FTRACE_NONE: + default: + pr_err("Invalid subcommand\n"); + ret = -EINVAL; + break; + } out_delete_evlist: evlist__delete(ftrace.evlist); From 177f4eac7fb7fe5c70fef30dd6c4ef8f81cf7776 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 15 Dec 2021 10:51:53 -0800 Subject: [PATCH 081/493] perf ftrace: Add -b/--use-bpf option for latency subcommand The -b/--use-bpf option is to use BPF to get latency info of kernel functions. It'd have better performance impact and I observed that latency of same function is smaller than before when using BPF. Committer testing: # strace -e bpf perf ftrace latency -b -T __handle_mm_fault -a sleep 1 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7fff51914e00, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\20\0\0\0\20\0\0\0\5\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=45, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\08\0\0\08\0\0\0\t\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=89, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\f\0\0\0\f\0\0\0\7\0\0\0\1\0\0\0\0\0\0\20"..., btf_log_buf=NULL, btf_size=43, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\5\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=77, btf_log_size=0, btf_log_level=0}, 128) = -1 EINVAL (Invalid argument) bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\350\2\0\0\350\2\0\0\353\2\0\0\0\0\0\0\0\0\0\2"..., btf_log_buf=NULL, btf_size=1515, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=32, max_entries=1, map_flags=0, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=5, insns=0x7fff51914c30, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 5 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=4, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7fff51914a80, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="test", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 4 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=8, value_size=8, max_entries=10000, map_flags=0, inner_map_fd=0, map_name="functime", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=1, map_flags=0, inner_map_fd=0, map_name="cpu_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 5 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=1, map_flags=0, inner_map_fd=0, map_name="task_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 7 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_PERCPU_ARRAY, key_size=4, value_size=8, max_entries=22, map_flags=0, inner_map_fd=0, map_name="latency", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 8 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=4, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="func_lat.bss", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=30, btf_vmlinux_value_type_id=0}, 128) = 9 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=9, key=0x7fff51914c40, value=0x7f6e99be2000, flags=BPF_ANY}, 128) = 0 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=18, insns=0x11e4160, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_begin", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x11dfc50, func_info_cnt=1, line_info_rec_size=16, line_info=0x11e04c0, line_info_cnt=9, attach_btf_id=0, attach_prog_fd=0}, 128) = 10 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=99, insns=0x11ded70, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_end", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x11dfc70, func_info_cnt=1, line_info_rec_size=16, line_info=0x11f6e10, line_info_cnt=20, attach_btf_id=0, attach_prog_fd=0}, 128) = 11 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_TRACEPOINT, insn_cnt=2, insns=0x7fff51914a80, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 13 bpf(BPF_LINK_CREATE, {link_create={prog_fd=13, target_fd=-1, attach_type=0x29 /* BPF_??? */, flags=0}}, 128) = -1 EINVAL (Invalid argument) --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=1699992, si_uid=0, si_status=0, si_utime=0, si_stime=0} --- bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 # DURATION | COUNT | GRAPH | 0 - 1 us | 52 | ################### | 1 - 2 us | 36 | ############# | 2 - 4 us | 24 | ######### | 4 - 8 us | 7 | ## | 8 - 16 us | 1 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | +++ exited with 0 +++ # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211215185154.360314-5-namhyung@kernel.org [ Add missing util/cpumap.h include and removed unused 'fd' variable ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- tools/perf/builtin-ftrace.c | 166 +++++++++++--------- tools/perf/util/Build | 1 + tools/perf/util/bpf_ftrace.c | 112 +++++++++++++ tools/perf/util/bpf_skel/func_latency.bpf.c | 93 +++++++++++ tools/perf/util/ftrace.h | 81 ++++++++++ 6 files changed, 384 insertions(+), 71 deletions(-) create mode 100644 tools/perf/util/bpf_ftrace.c create mode 100644 tools/perf/util/bpf_skel/func_latency.bpf.c create mode 100644 tools/perf/util/ftrace.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 164a37523781..ac861e42c8f7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h -SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h +SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): $(Q)$(MKDIR) -p $@ diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 8fd3c9c44c69..2b54e2ddc80a 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -30,36 +30,12 @@ #include "strfilter.h" #include "util/cap.h" #include "util/config.h" +#include "util/ftrace.h" #include "util/units.h" #include "util/parse-sublevel-options.h" #define DEFAULT_TRACER "function_graph" -struct perf_ftrace { - struct evlist *evlist; - struct target target; - const char *tracer; - struct list_head filters; - struct list_head notrace; - struct list_head graph_funcs; - struct list_head nograph_funcs; - int graph_depth; - unsigned long percpu_buffer_size; - bool inherit; - int func_stack_trace; - int func_irq_info; - int graph_nosleep_time; - int graph_noirqs; - int graph_verbose; - int graph_thresh; - unsigned int initial_delay; -}; - -struct filter_entry { - struct list_head list; - char name[]; -}; - static volatile int workload_exec_errno; static bool done; @@ -704,8 +680,6 @@ out: return (done && !workload_exec_errno) ? 0 : -1; } -#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */ - static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf) { char *p, *q; @@ -816,9 +790,87 @@ static void display_histogram(int buckets[]) } -static int __cmd_latency(struct perf_ftrace *ftrace) +static int prepare_func_latency(struct perf_ftrace *ftrace) { char *trace_file; + int fd; + + if (ftrace->target.use_bpf) + return perf_ftrace__latency_prepare_bpf(ftrace); + + if (reset_tracing_files(ftrace) < 0) { + pr_err("failed to reset ftrace\n"); + return -1; + } + + /* reset ftrace buffer */ + if (write_tracing_file("trace", "0") < 0) + return -1; + + if (set_tracing_options(ftrace) < 0) + return -1; + + /* force to use the function_graph tracer to track duration */ + if (write_tracing_file("current_tracer", "function_graph") < 0) { + pr_err("failed to set current_tracer to function_graph\n"); + return -1; + } + + trace_file = get_tracing_file("trace_pipe"); + if (!trace_file) { + pr_err("failed to open trace_pipe\n"); + return -1; + } + + fd = open(trace_file, O_RDONLY); + if (fd < 0) + pr_err("failed to open trace_pipe\n"); + + put_tracing_file(trace_file); + return fd; +} + +static int start_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_start_bpf(ftrace); + + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + return -1; + } + + return 0; +} + +static int stop_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_stop_bpf(ftrace); + + write_tracing_file("tracing_on", "0"); + return 0; +} + +static int read_func_latency(struct perf_ftrace *ftrace, int buckets[]) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_read_bpf(ftrace, buckets); + + return 0; +} + +static int cleanup_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_cleanup_bpf(ftrace); + + reset_tracing_files(ftrace); + return 0; +} + +static int __cmd_latency(struct perf_ftrace *ftrace) +{ int trace_fd; char buf[4096]; char line[256]; @@ -839,46 +891,15 @@ static int __cmd_latency(struct perf_ftrace *ftrace) return -1; } - if (reset_tracing_files(ftrace) < 0) { - pr_err("failed to reset ftrace\n"); + trace_fd = prepare_func_latency(ftrace); + if (trace_fd < 0) goto out; - } - - /* reset ftrace buffer */ - if (write_tracing_file("trace", "0") < 0) - goto out; - - if (set_tracing_options(ftrace) < 0) - goto out_reset; - - /* force to use the function_graph tracer to track duration */ - if (write_tracing_file("current_tracer", "function_graph") < 0) { - pr_err("failed to set current_tracer to function_graph\n"); - goto out_reset; - } - - trace_file = get_tracing_file("trace_pipe"); - if (!trace_file) { - pr_err("failed to open trace_pipe\n"); - goto out_reset; - } - - trace_fd = open(trace_file, O_RDONLY); - - put_tracing_file(trace_file); - - if (trace_fd < 0) { - pr_err("failed to open trace_pipe\n"); - goto out_reset; - } fcntl(trace_fd, F_SETFL, O_NONBLOCK); pollfd.fd = trace_fd; - if (write_tracing_file("tracing_on", "1") < 0) { - pr_err("can't enable tracing\n"); - goto out_close_fd; - } + if (start_func_latency(ftrace) < 0) + goto out; evlist__start_workload(ftrace->evlist); @@ -896,29 +917,30 @@ static int __cmd_latency(struct perf_ftrace *ftrace) } } - write_tracing_file("tracing_on", "0"); + stop_func_latency(ftrace); if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); pr_err("workload failed: %s\n", emsg); - goto out_close_fd; + goto out; } /* read remaining buffer contents */ - while (true) { + while (!ftrace->target.use_bpf) { int n = read(trace_fd, buf, sizeof(buf) - 1); if (n <= 0) break; make_histogram(buckets, buf, n, line); } + read_func_latency(ftrace, buckets); + display_histogram(buckets); -out_close_fd: - close(trace_fd); -out_reset: - reset_tracing_files(ftrace); out: + close(trace_fd); + cleanup_func_latency(ftrace); + return (done && !workload_exec_errno) ? 0 : -1; } @@ -1144,6 +1166,10 @@ int cmd_ftrace(int argc, const char **argv) const struct option latency_options[] = { OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", "Show latency of given function", parse_filter_func), +#ifdef HAVE_BPF_SKEL + OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, + "Use BPF to measure function latency"), +#endif OPT_PARENT(common_options), }; const struct option *options = ftrace_options; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2e5bfbb69960..294b12430d73 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -144,6 +144,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c new file mode 100644 index 000000000000..ec4e2f5a2fc4 --- /dev/null +++ b/tools/perf/util/bpf_ftrace.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include + +#include + +#include "util/ftrace.h" +#include "util/cpumap.h" +#include "util/debug.h" +#include "util/bpf_counter.h" + +#include "util/bpf_skel/func_latency.skel.h" + +static struct func_latency_bpf *skel; + +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) +{ + int err; + struct filter_entry *func; + + if (!list_is_singular(&ftrace->filters)) { + pr_err("ERROR: %s target function(s).\n", + list_empty(&ftrace->filters) ? "No" : "Too many"); + return -1; + } + + func = list_first_entry(&ftrace->filters, struct filter_entry, list); + + skel = func_latency_bpf__open(); + if (!skel) { + pr_err("Failed to open func latency skeleton\n"); + return -1; + } + + set_max_rlimit(); + + err = func_latency_bpf__load(skel); + if (err) { + pr_err("Failed to load func latency skeleton\n"); + goto out; + } + + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, + false, func->name); + if (IS_ERR(skel->links.func_begin)) { + pr_err("Failed to attach fentry program\n"); + err = PTR_ERR(skel->links.func_begin); + goto out; + } + + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, + true, func->name); + if (IS_ERR(skel->links.func_end)) { + pr_err("Failed to attach fexit program\n"); + err = PTR_ERR(skel->links.func_end); + goto out; + } + + /* XXX: we don't actually use this fd - just for poll() */ + return open("/dev/null", O_RDONLY); + +out: + return err; +} + +int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + skel->bss->enabled = 1; + return 0; +} + +int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + skel->bss->enabled = 0; + return 0; +} + +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, + int buckets[]) +{ + int i, fd, err; + u32 idx; + u64 *hist; + int ncpus = cpu__max_cpu(); + + fd = bpf_map__fd(skel->maps.latency); + + hist = calloc(ncpus, sizeof(*hist)); + if (hist == NULL) + return -ENOMEM; + + for (idx = 0; idx < NUM_BUCKET; idx++) { + err = bpf_map_lookup_elem(fd, &idx, hist); + if (err) { + buckets[idx] = 0; + continue; + } + + for (i = 0; i < ncpus; i++) + buckets[idx] += hist[i]; + } + + free(hist); + return 0; +} + +int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + func_latency_bpf__destroy(skel); + return 0; +} diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c new file mode 100644 index 000000000000..ccd96b09fc42 --- /dev/null +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Google +#include "vmlinux.h" +#include +#include + +// This should be in sync with "util/ftrace.h" +#define NUM_BUCKET 22 + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 10000); +} functime SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cpu_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} task_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, NUM_BUCKET); +} latency SEC(".maps"); + + +int enabled = 0; + +SEC("kprobe/func") +int BPF_PROG(func_begin) +{ + __u64 key, now; + + if (!enabled) + return 0; + + key = bpf_get_current_pid_tgid(); + now = bpf_ktime_get_ns(); + + // overwrite timestamp for nested functions + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); + return 0; +} + +SEC("kretprobe/func") +int BPF_PROG(func_end) +{ + __u64 tid; + __u64 *start; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + + start = bpf_map_lookup_elem(&functime, &tid); + if (start) { + __s64 delta = bpf_ktime_get_ns() - *start; + __u32 key; + __u64 *hist; + + bpf_map_delete_elem(&functime, &tid); + + if (delta < 0) + return 0; + + // calculate index using delta in usec + for (key = 0; key < (NUM_BUCKET - 1); key++) { + if (delta < ((1000UL) << key)) + break; + } + + hist = bpf_map_lookup_elem(&latency, &key); + if (!hist) + return 0; + + *hist += 1; + } + + return 0; +} diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h new file mode 100644 index 000000000000..887f68a185f7 --- /dev/null +++ b/tools/perf/util/ftrace.h @@ -0,0 +1,81 @@ +#ifndef __PERF_FTRACE_H__ +#define __PERF_FTRACE_H__ + +#include + +#include "target.h" + +struct evlist; + +struct perf_ftrace { + struct evlist *evlist; + struct target target; + const char *tracer; + struct list_head filters; + struct list_head notrace; + struct list_head graph_funcs; + struct list_head nograph_funcs; + unsigned long percpu_buffer_size; + bool inherit; + int graph_depth; + int func_stack_trace; + int func_irq_info; + int graph_nosleep_time; + int graph_noirqs; + int graph_verbose; + int graph_thresh; + unsigned int initial_delay; +}; + +struct filter_entry { + struct list_head list; + char name[]; +}; + +#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */ + +#ifdef HAVE_BPF_SKEL + +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, + int buckets[]); +int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace); + +#else /* !HAVE_BPF_SKEL */ + +static inline int +perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, + int buckets[] __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* __PERF_FTRACE_H__ */ From 9c5c605219578b8167b649245e00d6407f2c18da Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 15 Dec 2021 10:51:54 -0800 Subject: [PATCH 082/493] perf ftrace: Implement cpu and task filters in BPF Honor cpu and task options to set up filters (by pid or tid) in the BPF program. For example, the following command will show latency of the mutex_lock for process 2570. # perf ftrace latency -b -T mutex_lock -p 2570 sleep 3 # DURATION | COUNT | GRAPH | 0 - 1 us | 675 | ############################## | 1 - 2 us | 9 | | 2 - 4 us | 0 | | 4 - 8 us | 0 | | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | Committer testing: Looking at faults on a firefox process: # strace -e bpf perf ftrace latency -b -p 1674378 -T __handle_mm_fault bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffee1fee740, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\20\0\0\0\20\0\0\0\5\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=45, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\08\0\0\08\0\0\0\t\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=89, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\f\0\0\0\f\0\0\0\7\0\0\0\1\0\0\0\0\0\0\20"..., btf_log_buf=NULL, btf_size=43, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\5\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=77, btf_log_size=0, btf_log_level=0}, 128) = -1 EINVAL (Invalid argument) bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0 \3\0\0 \3\0\0\306\3\0\0\0\0\0\0\0\0\0\2"..., btf_log_buf=NULL, btf_size=1790, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=32, max_entries=1, map_flags=0, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=5, insns=0x7ffee1fee570, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 5 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=4, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffee1fee3c0, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="test", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 4 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=8, value_size=8, max_entries=10000, map_flags=0, inner_map_fd=0, map_name="functime", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=1, map_flags=0, inner_map_fd=0, map_name="cpu_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 5 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=36, map_flags=0, inner_map_fd=0, map_name="task_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 6 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_PERCPU_ARRAY, key_size=4, value_size=8, max_entries=22, map_flags=0, inner_map_fd=0, map_name="latency", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 7 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=12, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="func_lat.bss", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=32, btf_vmlinux_value_type_id=0}, 128) = 8 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=8, key=0x7ffee1fee580, value=0x7f01d940a000, flags=BPF_ANY}, 128) = 0 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=42, insns=0x1871f30, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_begin", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x18746a0, func_info_cnt=1, line_info_rec_size=16, line_info=0x1874550, line_info_cnt=20, attach_btf_id=0, attach_prog_fd=0}, 128) = 9 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=99, insns=0x18769b0, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_end", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x188a640, func_info_cnt=1, line_info_rec_size=16, line_info=0x188a660, line_info_cnt=20, attach_btf_id=0, attach_prog_fd=0}, 128) = 10 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_TRACEPOINT, insn_cnt=2, insns=0x7ffee1fee3c0, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 12 bpf(BPF_LINK_CREATE, {link_create={prog_fd=12, target_fd=-1, attach_type=0x29 /* BPF_??? */, flags=0}}, 128) = -1 EINVAL (Invalid argument) ^Cstrace: Process 1702285 detached # DURATION | COUNT | GRAPH | 0 - 1 us | 109 | ################# | 1 - 2 us | 127 | ################### | 2 - 4 us | 36 | ##### | 4 - 8 us | 20 | ### | 8 - 16 us | 2 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211215185154.360314-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_ftrace.c | 42 ++++++++++++++++++++- tools/perf/util/bpf_skel/func_latency.bpf.c | 21 +++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index ec4e2f5a2fc4..f00a2de6778c 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -7,7 +7,9 @@ #include "util/ftrace.h" #include "util/cpumap.h" +#include "util/thread_map.h" #include "util/debug.h" +#include "util/evlist.h" #include "util/bpf_counter.h" #include "util/bpf_skel/func_latency.skel.h" @@ -16,7 +18,8 @@ static struct func_latency_bpf *skel; int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) { - int err; + int fd, err; + int i, ncpus = 1, ntasks = 1; struct filter_entry *func; if (!list_is_singular(&ftrace->filters)) { @@ -33,6 +36,17 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) return -1; } + /* don't need to set cpu filter for system-wide mode */ + if (ftrace->target.cpu_list) { + ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus); + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + } + + if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { + ntasks = perf_thread_map__nr(ftrace->evlist->core.threads); + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } + set_max_rlimit(); err = func_latency_bpf__load(skel); @@ -41,6 +55,32 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) goto out; } + if (ftrace->target.cpu_list) { + u32 cpu; + u8 val = 1; + + skel->bss->has_cpu = 1; + fd = bpf_map__fd(skel->maps.cpu_filter); + + for (i = 0; i < ncpus; i++) { + cpu = cpu_map__cpu(ftrace->evlist->core.cpus, i); + bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); + } + } + + if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + + for (i = 0; i < ntasks; i++) { + pid = perf_thread_map__pid(ftrace->evlist->core.threads, i); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + } + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, false, func->name); if (IS_ERR(skel->links.func_begin)) { diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index ccd96b09fc42..ea94187fe443 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -37,6 +37,8 @@ struct { int enabled = 0; +int has_cpu = 0; +int has_task = 0; SEC("kprobe/func") int BPF_PROG(func_begin) @@ -47,6 +49,25 @@ int BPF_PROG(func_begin) return 0; key = bpf_get_current_pid_tgid(); + + if (has_cpu) { + __u32 cpu = bpf_get_smp_processor_id(); + __u8 *ok; + + ok = bpf_map_lookup_elem(&cpu_filter, &cpu); + if (!ok) + return 0; + } + + if (has_task) { + __u32 pid = key & 0xffffffff; + __u8 *ok; + + ok = bpf_map_lookup_elem(&task_filter, &pid); + if (!ok) + return 0; + } + now = bpf_ktime_get_ns(); // overwrite timestamp for nested functions From a840974e96fd51b47c79301522bccf23cc8bb388 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 16 Dec 2021 16:14:54 +0100 Subject: [PATCH 083/493] perf test: Test 73 Sig_trap fails on s390 In Linux next commit 5504f67944484495 ("perf test sigtrap: Add basic stress test for sigtrap handling") introduced the new test which uses breakpoint events. These events are not supported on s390 and PowerPC and always fail: # perf test -F 73 73: Sigtrap : FAILED! # Fix it the same way as in the breakpoint tests in file tests/bp_account.c where these type of tests are skipped on s390 and PowerPC platforms. With this patch skip this test on both platforms. Output after: # perf test -F 73 73: Sigtrap # Fixes: 5504f67944484495 ("perf test sigtrap: Add basic stress test for sigtrap handling") Signed-off-by: Thomas Richter Acked-by: Marco Elver Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20211216151454.752066-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sigtrap.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c index 1004bf0e7cc9..1f147fe6595f 100644 --- a/tools/perf/tests/sigtrap.c +++ b/tools/perf/tests/sigtrap.c @@ -22,6 +22,19 @@ #include "tests.h" #include "../perf-sys.h" +/* + * PowerPC and S390 do not support creation of instruction breakpoints using the + * perf_event interface. + * + * Just disable the test for these architectures until these issues are + * resolved. + */ +#if defined(__powerpc__) || defined(__s390x__) +#define BP_ACCOUNT_IS_SUPPORTED 0 +#else +#define BP_ACCOUNT_IS_SUPPORTED 1 +#endif + #define NUM_THREADS 5 static struct { @@ -122,6 +135,11 @@ static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __m char sbuf[STRERR_BUFSIZE]; int i, fd, ret = TEST_FAIL; + if (!BP_ACCOUNT_IS_SUPPORTED) { + pr_debug("Test not supported on this architecture"); + return TEST_SKIP; + } + pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1); action.sa_flags = SA_SIGINFO | SA_NODEFER; From f17e53388e82ebefc78ff53e33a6d8eebc1ad337 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 8 Dec 2021 12:42:12 +0100 Subject: [PATCH 084/493] dmaengine: xilinx: Handle IRQ mapping errors Handle errors when trying to map the IRQ for the DMA channels. The main motivation here is to be able to handle probe deferral. E.g. when using DT overlays it is possible that the DMA controller is probed before interrupt controller, depending on the order in the DT. In order to support this switch from irq_of_parse_and_map() to of_irq_get(), which internally does the same, but it will return EPROBE_DEFER when the interrupt controller is not yet available. As a result other errors, such as an invalid IRQ specification, or missing IRQ are also properly handled. Signed-off-by: Lars-Peter Clausen Reviewed-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20211208114212.234130-1-lars@metafoo.de Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 61618148f9d4..cd62bbb50e8b 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2980,7 +2980,9 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, } /* Request the interrupt */ - chan->irq = irq_of_parse_and_map(node, chan->tdest); + chan->irq = of_irq_get(node, chan->tdest); + if (chan->irq < 0) + return dev_err_probe(xdev->dev, chan->irq, "failed to get irq\n"); err = request_irq(chan->irq, xdev->dma_config->irq_handler, IRQF_SHARED, "xilinx-dma-controller", chan); if (err) { @@ -3054,8 +3056,11 @@ static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0) dev_warn(xdev->dev, "missing dma-channels property\n"); - for (i = 0; i < nr_channels; i++) - xilinx_dma_chan_probe(xdev, node); + for (i = 0; i < nr_channels; i++) { + ret = xilinx_dma_chan_probe(xdev, node); + if (ret) + return ret; + } return 0; } From aa8ff35e10030c12df9f1b4a364f540f973c620f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 Dec 2021 18:10:13 -0600 Subject: [PATCH 085/493] dmaengine: at_xdmac: Use struct_size() in devm_kzalloc() Make use of the struct_size() helper instead of an open-coded version, in order to avoid any potential type mistakes or integer overflows that, in the worst scenario, could lead to heap overflows. Link: https://github.com/KSPP/linux/issues/160 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20211208001013.GA62330@embeddedor Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 275a76f188ae..e42dede5b243 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -2031,7 +2031,7 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev) static int at_xdmac_probe(struct platform_device *pdev) { struct at_xdmac *atxdmac; - int irq, size, nr_channels, i, ret; + int irq, nr_channels, i, ret; void __iomem *base; u32 reg; @@ -2056,9 +2056,9 @@ static int at_xdmac_probe(struct platform_device *pdev) return -EINVAL; } - size = sizeof(*atxdmac); - size += nr_channels * sizeof(struct at_xdmac_chan); - atxdmac = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + atxdmac = devm_kzalloc(&pdev->dev, + struct_size(atxdmac, chan, nr_channels), + GFP_KERNEL); if (!atxdmac) { dev_err(&pdev->dev, "can't allocate at_xdmac structure\n"); return -ENOMEM; From 839c2e2371dba08fc3a8aea5d5e6525dd19cab66 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Fri, 19 Nov 2021 18:53:13 +0530 Subject: [PATCH 086/493] dmaengine: ti: k3-udma: Add SoC dependent data for J721S2 SoC Add SYSFW defined rchan_oes_offset number for J721S2 SoC in soc data. Signed-off-by: Aswath Govindraju Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20211119132315.15901-2-a-govindraju@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 041d8e32d630..895dcd0e8b60 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4376,6 +4376,7 @@ static const struct soc_device_attribute k3_soc_devices[] = { { .family = "J721E", .data = &j721e_soc_data }, { .family = "J7200", .data = &j7200_soc_data }, { .family = "AM64X", .data = &am64_soc_data }, + { .family = "J721S2", .data = &j721e_soc_data}, { /* sentinel */ } }; From 78b2f63cd0cc115ea10711b59734c7430deb3fe3 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Fri, 19 Nov 2021 18:53:14 +0530 Subject: [PATCH 087/493] drivers: dma: ti: k3-psil: Add support for J721S2 Add support for J721S2 SOC. Signed-off-by: Aswath Govindraju Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20211119132315.15901-3-a-govindraju@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/Makefile | 3 +- drivers/dma/ti/k3-psil-j721s2.c | 167 ++++++++++++++++++++++++++++++++ drivers/dma/ti/k3-psil-priv.h | 1 + drivers/dma/ti/k3-psil.c | 1 + 4 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 drivers/dma/ti/k3-psil-j721s2.c diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile index bd496efadff7..1d4081a049b7 100644 --- a/drivers/dma/ti/Makefile +++ b/drivers/dma/ti/Makefile @@ -8,5 +8,6 @@ obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o \ k3-psil-am654.o \ k3-psil-j721e.o \ k3-psil-j7200.o \ - k3-psil-am64.o + k3-psil-am64.o \ + k3-psil-j721s2.o obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o diff --git a/drivers/dma/ti/k3-psil-j721s2.c b/drivers/dma/ti/k3-psil-j721s2.c new file mode 100644 index 000000000000..4c4172a4d271 --- /dev/null +++ b/drivers/dma/ti/k3-psil-j721s2.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + }, \ + } + +#define PSIL_SA2UL(x, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .notdpkt = tx, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep j721s2_src_ep_map[] = { + /* PDMA_MCASP - McASP0-4 */ + PSIL_PDMA_MCASP(0x4400), + PSIL_PDMA_MCASP(0x4401), + PSIL_PDMA_MCASP(0x4402), + PSIL_PDMA_MCASP(0x4403), + PSIL_PDMA_MCASP(0x4404), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0x4600), + PSIL_PDMA_XY_PKT(0x4601), + PSIL_PDMA_XY_PKT(0x4602), + PSIL_PDMA_XY_PKT(0x4603), + PSIL_PDMA_XY_PKT(0x4604), + PSIL_PDMA_XY_PKT(0x4605), + PSIL_PDMA_XY_PKT(0x4606), + PSIL_PDMA_XY_PKT(0x4607), + PSIL_PDMA_XY_PKT(0x4608), + PSIL_PDMA_XY_PKT(0x4609), + PSIL_PDMA_XY_PKT(0x460a), + PSIL_PDMA_XY_PKT(0x460b), + PSIL_PDMA_XY_PKT(0x460c), + PSIL_PDMA_XY_PKT(0x460d), + PSIL_PDMA_XY_PKT(0x460e), + PSIL_PDMA_XY_PKT(0x460f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0x4610), + PSIL_PDMA_XY_PKT(0x4611), + PSIL_PDMA_XY_PKT(0x4612), + PSIL_PDMA_XY_PKT(0x4613), + PSIL_PDMA_XY_PKT(0x4614), + PSIL_PDMA_XY_PKT(0x4615), + PSIL_PDMA_XY_PKT(0x4616), + PSIL_PDMA_XY_PKT(0x4617), + PSIL_PDMA_XY_PKT(0x4618), + PSIL_PDMA_XY_PKT(0x4619), + PSIL_PDMA_XY_PKT(0x461a), + PSIL_PDMA_XY_PKT(0x461b), + PSIL_PDMA_XY_PKT(0x461c), + PSIL_PDMA_XY_PKT(0x461d), + PSIL_PDMA_XY_PKT(0x461e), + PSIL_PDMA_XY_PKT(0x461f), + /* PDMA_USART_G0 - UART0-1 */ + PSIL_PDMA_XY_PKT(0x4700), + PSIL_PDMA_XY_PKT(0x4701), + /* PDMA_USART_G1 - UART2-3 */ + PSIL_PDMA_XY_PKT(0x4702), + PSIL_PDMA_XY_PKT(0x4703), + /* PDMA_USART_G2 - UART4-9 */ + PSIL_PDMA_XY_PKT(0x4704), + PSIL_PDMA_XY_PKT(0x4705), + PSIL_PDMA_XY_PKT(0x4706), + PSIL_PDMA_XY_PKT(0x4707), + PSIL_PDMA_XY_PKT(0x4708), + PSIL_PDMA_XY_PKT(0x4709), + /* CPSW0 */ + PSIL_ETHERNET(0x7000), + /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */ + PSIL_PDMA_XY_PKT(0x7100), + PSIL_PDMA_XY_PKT(0x7101), + PSIL_PDMA_XY_PKT(0x7102), + PSIL_PDMA_XY_PKT(0x7103), + /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */ + PSIL_PDMA_XY_PKT(0x7200), + PSIL_PDMA_XY_PKT(0x7201), + PSIL_PDMA_XY_PKT(0x7202), + PSIL_PDMA_XY_PKT(0x7203), + PSIL_PDMA_XY_PKT(0x7204), + PSIL_PDMA_XY_PKT(0x7205), + PSIL_PDMA_XY_PKT(0x7206), + PSIL_PDMA_XY_PKT(0x7207), + /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */ + PSIL_PDMA_XY_PKT(0x7300), + /* MCU_PDMA_ADC - ADC0-1 */ + PSIL_PDMA_XY_TR(0x7400), + PSIL_PDMA_XY_TR(0x7401), + PSIL_PDMA_XY_TR(0x7402), + PSIL_PDMA_XY_TR(0x7403), + /* SA2UL */ + PSIL_SA2UL(0x7500, 0), + PSIL_SA2UL(0x7501, 0), + PSIL_SA2UL(0x7502, 0), + PSIL_SA2UL(0x7503, 0), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep j721s2_dst_ep_map[] = { + /* CPSW0 */ + PSIL_ETHERNET(0xf000), + PSIL_ETHERNET(0xf001), + PSIL_ETHERNET(0xf002), + PSIL_ETHERNET(0xf003), + PSIL_ETHERNET(0xf004), + PSIL_ETHERNET(0xf005), + PSIL_ETHERNET(0xf006), + PSIL_ETHERNET(0xf007), + /* SA2UL */ + PSIL_SA2UL(0xf500, 1), + PSIL_SA2UL(0xf501, 1), +}; + +struct psil_ep_map j721s2_ep_map = { + .name = "j721s2", + .src = j721s2_src_ep_map, + .src_count = ARRAY_SIZE(j721s2_src_ep_map), + .dst = j721s2_dst_ep_map, + .dst_count = ARRAY_SIZE(j721s2_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h index b74e192e3c2d..e51e179cdb56 100644 --- a/drivers/dma/ti/k3-psil-priv.h +++ b/drivers/dma/ti/k3-psil-priv.h @@ -41,5 +41,6 @@ extern struct psil_ep_map am654_ep_map; extern struct psil_ep_map j721e_ep_map; extern struct psil_ep_map j7200_ep_map; extern struct psil_ep_map am64_ep_map; +extern struct psil_ep_map j721s2_ep_map; #endif /* K3_PSIL_PRIV_H_ */ diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c index 13ce7367d870..8867b4bd0c51 100644 --- a/drivers/dma/ti/k3-psil.c +++ b/drivers/dma/ti/k3-psil.c @@ -21,6 +21,7 @@ static const struct soc_device_attribute k3_soc_devices[] = { { .family = "J721E", .data = &j721e_ep_map }, { .family = "J7200", .data = &j7200_ep_map }, { .family = "AM64X", .data = &am64_ep_map }, + { .family = "J721S2", .data = &j721s2_ep_map }, { /* sentinel */ } }; From a173a2428752b43f50dc2cd500437baff6a62376 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 6 Dec 2021 11:42:31 -0600 Subject: [PATCH 088/493] dt-bindings: dma: pl08x: Fix unevaluatedProperties warnings With 'unevaluatedProperties' support implemented, the example has warnings on primecell properties and 'resets': Documentation/devicetree/bindings/dma/arm-pl08x.example.dt.yaml: dma-controller@67000000: Unevaluated properties are not allowed ('arm,primecell-periphid', 'resets' were unexpected) Add the missing reference to primecell.yaml and definition for 'resets'. Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/20211206174231.2298349-1-robh@kernel.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/arm-pl08x.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/arm-pl08x.yaml b/Documentation/devicetree/bindings/dma/arm-pl08x.yaml index 3bd9eea543ca..9193b18fb75f 100644 --- a/Documentation/devicetree/bindings/dma/arm-pl08x.yaml +++ b/Documentation/devicetree/bindings/dma/arm-pl08x.yaml @@ -10,6 +10,7 @@ maintainers: - Vinod Koul allOf: + - $ref: /schemas/arm/primecell.yaml# - $ref: "dma-controller.yaml#" # We need a select here so we don't match all nodes with 'arm,primecell' @@ -89,6 +90,9 @@ properties: - 64 description: bus width used for memcpy in bits. FTDMAC020 also accept 64 bits + resets: + maxItems: 1 + required: - reg - interrupts From 5f1e024c9d07d09988757761107ba4bb1ae7d408 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 6 Dec 2021 11:42:26 -0600 Subject: [PATCH 089/493] dt-bindings: dma: ti: Add missing ti,k3-sci-common.yaml reference The TI k3-bcdma and k3-pktdma both use 'ti,sci' and 'ti,sci-dev-id' properties defined in ti,k3-sci-common.yaml. When 'unevaluatedProperties' support is enabled, the follow warning is generated: Documentation/devicetree/bindings/dma/ti/k3-bcdma.example.dt.yaml: dma-controller@485c0100: Unevaluated properties are not allowed ('ti,sci', 'ti,sci-dev-id' were unexpected) Documentation/devicetree/bindings/dma/ti/k3-pktdma.example.dt.yaml: dma-controller@485c0000: Unevaluated properties are not allowed ('ti,sci', 'ti,sci-dev-id' were unexpected) Add a reference to ti,k3-sci-common.yaml to fix this. Cc: Peter Ujfalusi Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/20211206174226.2298135-1-robh@kernel.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml | 1 + Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml index df29d59d13a8..08627d91e607 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml @@ -30,6 +30,7 @@ description: | allOf: - $ref: /schemas/dma/dma-controller.yaml# + - $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml index ea19d12a9337..507d16d84ade 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml @@ -25,6 +25,7 @@ description: | allOf: - $ref: /schemas/dma/dma-controller.yaml# + - $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml# properties: compatible: From e0699a75955dabac1e2edcf67d74b9998fe9d42c Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 6 Dec 2021 17:42:54 +0000 Subject: [PATCH 090/493] dt-bindings: dma: ingenic: Add compatible strings for MDMA and BDMA The JZ4760 and JZ4760B SoCs have two additional DMA controllers: the MDMA, which only supports memcpy operations, and the BDMA which is mostly used for transfer between memories and the BCH controller. The JZ4770 also features the same BDMA as in the JZ4760B, but does not seem to have a MDMA. Signed-off-by: Paul Cercueil Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211206174259.68133-2-paul@crapouillou.net Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/ingenic,dma.yaml | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml index dc059d6fd037..2607b403277e 100644 --- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml +++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml @@ -14,15 +14,23 @@ allOf: properties: compatible: - enum: - - ingenic,jz4740-dma - - ingenic,jz4725b-dma - - ingenic,jz4760-dma - - ingenic,jz4760b-dma - - ingenic,jz4770-dma - - ingenic,jz4780-dma - - ingenic,x1000-dma - - ingenic,x1830-dma + oneOf: + - enum: + - ingenic,jz4740-dma + - ingenic,jz4725b-dma + - ingenic,jz4760-dma + - ingenic,jz4760-bdma + - ingenic,jz4760-mdma + - ingenic,jz4760b-dma + - ingenic,jz4760b-bdma + - ingenic,jz4760b-mdma + - ingenic,jz4770-dma + - ingenic,jz4780-dma + - ingenic,x1000-dma + - ingenic,x1830-dma + - items: + - const: ingenic,jz4770-bdma + - const: ingenic,jz4760b-bdma reg: items: From dafa79a10ed70683811295cb68deca2d30c22ef4 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 6 Dec 2021 17:42:55 +0000 Subject: [PATCH 091/493] dt-bindings: dma: ingenic: Support #dma-cells = <3> Extend the binding to support specifying a different request type for each direction. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20211206174259.68133-3-paul@crapouillou.net Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/ingenic,dma.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml index 2607b403277e..3b0b3b919af8 100644 --- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml +++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml @@ -44,13 +44,19 @@ properties: maxItems: 1 "#dma-cells": - const: 2 + enum: [2, 3] description: > DMA clients must use the format described in dma.txt, giving a phandle - to the DMA controller plus the following 2 integer cells: + to the DMA controller plus the following integer cells: - - Request type: The DMA request type for transfers to/from the - device on the allocated channel, as defined in the SoC documentation. + - Request type: The DMA request type specifies the device endpoint that + will be the source or destination of the DMA transfer. + If "#dma-cells" is 2, the request type is a single cell, and the + direction will be unidirectional (either RX or TX but not both). + If "#dma-cells" is 3, the request type has two cells; the first + one corresponds to the host to device direction (TX), the second one + corresponds to the device to host direction (RX). The DMA channel is + then bidirectional. - Channel: If set to 0xffffffff, any available channel will be allocated for the client. Otherwise, the exact channel specified will be used. From b72cbb1ab2aff3ceef8a2703052d06dc216b01f0 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 6 Dec 2021 17:42:56 +0000 Subject: [PATCH 092/493] dmaengine: jz4780: Work around hardware bug on JZ4760 SoCs The JZ4760 SoC has a hardware problem with chan0 not enabling properly if it's enabled before chan1, after a reset (works fine afterwards). This is worked around in the probe function by just enabling then disabling chan1. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20211206174259.68133-4-paul@crapouillou.net Signed-off-by: Vinod Koul --- drivers/dma/dma-jz4780.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 96701dedcac8..d63753a56541 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -938,6 +938,14 @@ static int jz4780_dma_probe(struct platform_device *pdev) jzchan->vchan.desc_free = jz4780_dma_desc_free; } + /* + * On JZ4760, chan0 won't enable properly the first time. + * Enabling then disabling chan1 will magically make chan0 work + * correctly. + */ + jz4780_dma_chan_enable(jzdma, 1); + jz4780_dma_chan_disable(jzdma, 1); + ret = platform_get_irq(pdev, 0); if (ret < 0) goto err_disable_clk; From 3d70fccf74feba4125542663ff49b4d42d3dcbe7 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 6 Dec 2021 17:42:57 +0000 Subject: [PATCH 093/493] dmaengine: jz4780: Add support for the MDMA and BDMA in the JZ4760(B) The JZ4760 and JZ4760B SoCs have two regular DMA controllers with 6 channels each. They also have an extra DMA controller named MDMA with only 2 channels, that only supports memcpy operations, and one named BDMA with only 3 channels, that is mostly used for transfers between memories and the BCH controller. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20211206174259.68133-5-paul@crapouillou.net Signed-off-by: Vinod Koul --- drivers/dma/dma-jz4780.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index d63753a56541..bcd21d7a559d 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -1019,12 +1019,36 @@ static const struct jz4780_dma_soc_data jz4760_dma_soc_data = { .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, }; +static const struct jz4780_dma_soc_data jz4760_mdma_soc_data = { + .nb_channels = 2, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, +}; + +static const struct jz4780_dma_soc_data jz4760_bdma_soc_data = { + .nb_channels = 3, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, +}; + static const struct jz4780_dma_soc_data jz4760b_dma_soc_data = { .nb_channels = 5, .transfer_ord_max = 6, .flags = JZ_SOC_DATA_PER_CHAN_PM, }; +static const struct jz4780_dma_soc_data jz4760b_mdma_soc_data = { + .nb_channels = 2, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM, +}; + +static const struct jz4780_dma_soc_data jz4760b_bdma_soc_data = { + .nb_channels = 3, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM, +}; + static const struct jz4780_dma_soc_data jz4770_dma_soc_data = { .nb_channels = 6, .transfer_ord_max = 6, @@ -1053,7 +1077,11 @@ static const struct of_device_id jz4780_dma_dt_match[] = { { .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data }, { .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data }, { .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data }, + { .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data }, + { .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data }, { .compatible = "ingenic,jz4760b-dma", .data = &jz4760b_dma_soc_data }, + { .compatible = "ingenic,jz4760b-mdma", .data = &jz4760b_mdma_soc_data }, + { .compatible = "ingenic,jz4760b-bdma", .data = &jz4760b_bdma_soc_data }, { .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data }, { .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data }, { .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data }, From c8c0cda827b90aad250360c657b30b2bcdf82503 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 6 Dec 2021 17:42:58 +0000 Subject: [PATCH 094/493] dmaengine: jz4780: Replace uint32_t with u32 Replace the uint32_t type used all over dma-jz4780.c with the equivalent Linux type: u32. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20211206174259.68133-6-paul@crapouillou.net Signed-off-by: Vinod Koul --- drivers/dma/dma-jz4780.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index bcd21d7a559d..c8c4bbd57d14 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -104,10 +104,10 @@ * descriptor base address in the upper 8 bits. */ struct jz4780_dma_hwdesc { - uint32_t dcm; - uint32_t dsa; - uint32_t dta; - uint32_t dtc; + u32 dcm; + u32 dsa; + u32 dta; + u32 dtc; }; /* Size of allocations for hardware descriptor blocks. */ @@ -122,7 +122,7 @@ struct jz4780_dma_desc { dma_addr_t desc_phys; unsigned int count; enum dma_transaction_type type; - uint32_t status; + u32 status; }; struct jz4780_dma_chan { @@ -130,8 +130,8 @@ struct jz4780_dma_chan { unsigned int id; struct dma_pool *desc_pool; - uint32_t transfer_type; - uint32_t transfer_shift; + u32 transfer_type; + u32 transfer_shift; struct dma_slave_config config; struct jz4780_dma_desc *desc; @@ -152,12 +152,12 @@ struct jz4780_dma_dev { unsigned int irq; const struct jz4780_dma_soc_data *soc_data; - uint32_t chan_reserved; + u32 chan_reserved; struct jz4780_dma_chan chan[]; }; struct jz4780_dma_filter_data { - uint32_t transfer_type; + u32 transfer_type; int channel; }; @@ -179,26 +179,26 @@ static inline struct jz4780_dma_dev *jz4780_dma_chan_parent( dma_device); } -static inline uint32_t jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma, +static inline u32 jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma, unsigned int chn, unsigned int reg) { return readl(jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn)); } static inline void jz4780_dma_chn_writel(struct jz4780_dma_dev *jzdma, - unsigned int chn, unsigned int reg, uint32_t val) + unsigned int chn, unsigned int reg, u32 val) { writel(val, jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn)); } -static inline uint32_t jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma, +static inline u32 jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma, unsigned int reg) { return readl(jzdma->ctrl_base + reg); } static inline void jz4780_dma_ctrl_writel(struct jz4780_dma_dev *jzdma, - unsigned int reg, uint32_t val) + unsigned int reg, u32 val) { writel(val, jzdma->ctrl_base + reg); } @@ -260,8 +260,8 @@ static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc) kfree(desc); } -static uint32_t jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan, - unsigned long val, uint32_t *shift) +static u32 jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan, + unsigned long val, u32 *shift) { struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); int ord = ffs(val) - 1; @@ -303,7 +303,7 @@ static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, enum dma_transfer_direction direction) { struct dma_slave_config *config = &jzchan->config; - uint32_t width, maxburst, tsz; + u32 width, maxburst, tsz; if (direction == DMA_MEM_TO_DEV) { desc->dcm = JZ_DMA_DCM_SAI; @@ -453,7 +453,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( { struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_desc *desc; - uint32_t tsz; + u32 tsz; desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY); if (!desc) @@ -670,7 +670,7 @@ static bool jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma, { const unsigned int soc_flags = jzdma->soc_data->flags; struct jz4780_dma_desc *desc = jzchan->desc; - uint32_t dcs; + u32 dcs; bool ack = true; spin_lock(&jzchan->vchan.lock); @@ -727,7 +727,7 @@ static irqreturn_t jz4780_dma_irq_handler(int irq, void *data) struct jz4780_dma_dev *jzdma = data; unsigned int nb_channels = jzdma->soc_data->nb_channels; unsigned long pending; - uint32_t dmac; + u32 dmac; int i; pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP); From 76a096637d6381165584c6e9a21e531d1911c549 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 6 Dec 2021 17:42:59 +0000 Subject: [PATCH 095/493] dmaengine: jz4780: Support bidirectional I/O on one channel For some devices with only half-duplex capabilities, it doesn't make much sense to use one DMA channel per direction, as both channels will never be active at the same time. Add support for bidirectional I/O on DMA channels. The client drivers can then request a "tx-rx" DMA channel which will be used for both directions. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20211206174259.68133-7-paul@crapouillou.net Signed-off-by: Vinod Koul --- drivers/dma/dma-jz4780.c | 48 ++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index c8c4bbd57d14..fc513eb2b289 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -122,6 +122,7 @@ struct jz4780_dma_desc { dma_addr_t desc_phys; unsigned int count; enum dma_transaction_type type; + u32 transfer_type; u32 status; }; @@ -130,7 +131,7 @@ struct jz4780_dma_chan { unsigned int id; struct dma_pool *desc_pool; - u32 transfer_type; + u32 transfer_type_tx, transfer_type_rx; u32 transfer_shift; struct dma_slave_config config; @@ -157,7 +158,7 @@ struct jz4780_dma_dev { }; struct jz4780_dma_filter_data { - u32 transfer_type; + u32 transfer_type_tx, transfer_type_rx; int channel; }; @@ -226,9 +227,10 @@ static inline void jz4780_dma_chan_disable(struct jz4780_dma_dev *jzdma, jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DCKEC, BIT(chn)); } -static struct jz4780_dma_desc *jz4780_dma_desc_alloc( - struct jz4780_dma_chan *jzchan, unsigned int count, - enum dma_transaction_type type) +static struct jz4780_dma_desc * +jz4780_dma_desc_alloc(struct jz4780_dma_chan *jzchan, unsigned int count, + enum dma_transaction_type type, + enum dma_transfer_direction direction) { struct jz4780_dma_desc *desc; @@ -248,6 +250,12 @@ static struct jz4780_dma_desc *jz4780_dma_desc_alloc( desc->count = count; desc->type = type; + + if (direction == DMA_DEV_TO_MEM) + desc->transfer_type = jzchan->transfer_type_rx; + else + desc->transfer_type = jzchan->transfer_type_tx; + return desc; } @@ -361,7 +369,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( unsigned int i; int err; - desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE); + desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE, direction); if (!desc) return NULL; @@ -410,7 +418,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic( periods = buf_len / period_len; - desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC); + desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC, direction); if (!desc) return NULL; @@ -455,14 +463,14 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( struct jz4780_dma_desc *desc; u32 tsz; - desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY); + desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY, 0); if (!desc) return NULL; tsz = jz4780_dma_transfer_size(jzchan, dest | src | len, &jzchan->transfer_shift); - jzchan->transfer_type = JZ_DMA_DRT_AUTO; + desc->transfer_type = JZ_DMA_DRT_AUTO; desc->desc[0].dsa = src; desc->desc[0].dta = dest; @@ -528,7 +536,7 @@ static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan) /* Set transfer type. */ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DRT, - jzchan->transfer_type); + jzchan->desc->transfer_type); /* * Set the transfer count. This is redundant for a descriptor-driven @@ -788,7 +796,8 @@ static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param) return false; } - jzchan->transfer_type = data->transfer_type; + jzchan->transfer_type_tx = data->transfer_type_tx; + jzchan->transfer_type_rx = data->transfer_type_rx; return true; } @@ -800,11 +809,17 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, dma_cap_mask_t mask = jzdma->dma_device.cap_mask; struct jz4780_dma_filter_data data; - if (dma_spec->args_count != 2) + if (dma_spec->args_count == 2) { + data.transfer_type_tx = dma_spec->args[0]; + data.transfer_type_rx = dma_spec->args[0]; + data.channel = dma_spec->args[1]; + } else if (dma_spec->args_count == 3) { + data.transfer_type_tx = dma_spec->args[0]; + data.transfer_type_rx = dma_spec->args[1]; + data.channel = dma_spec->args[2]; + } else { return NULL; - - data.transfer_type = dma_spec->args[0]; - data.channel = dma_spec->args[1]; + } if (data.channel > -1) { if (data.channel >= jzdma->soc_data->nb_channels) { @@ -822,7 +837,8 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, return NULL; } - jzdma->chan[data.channel].transfer_type = data.transfer_type; + jzdma->chan[data.channel].transfer_type_tx = data.transfer_type_tx; + jzdma->chan[data.channel].transfer_type_rx = data.transfer_type_rx; return dma_get_slave_channel( &jzdma->chan[data.channel].vchan.chan); From 0f93f2047d56d6ab93ba1ffeb30d318d0c5f52d7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 Nov 2021 16:20:08 +0100 Subject: [PATCH 096/493] dt-bindings: dma: snps,dw-axi-dmac: Document optional reset "make dtbs_check": Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml arch/riscv/boot/dts/canaan/sipeed_maix_bit.dt.yaml: dma-controller@50000000: 'resets' does not match any of the regexes: 'pinctrl-[0-9]+' From schema: Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml The Synopsys DesignWare AXI DMA Controller on the Canaan K210 SoC exposes its reset signal. Signed-off-by: Geert Uytterhoeven Acked-by: Rob Herring Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20211125152008.162571-1-geert@linux-m68k.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml index 79e241498e25..4324a94b26b2 100644 --- a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml @@ -53,6 +53,9 @@ properties: minimum: 1 maximum: 8 + resets: + maxItems: 1 + snps,dma-masters: description: | Number of AXI masters supported by the hardware. From 92452a72ebdf1225aa37690d3648f2af6d0b4fca Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:45:34 -0700 Subject: [PATCH 097/493] dmaengine: idxd: set defaults for wq configs Add default values for wq size, max_xfer_size and max_batch_size. These values should provide a general guidance for the wq configuration when the user does not specify any specific values. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528473483.3926048.7950067926287180976.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 13 +++++-------- drivers/dma/idxd/idxd.h | 4 ++++ drivers/dma/idxd/init.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 1dc5245107df..36e213a8108d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -390,6 +390,8 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) clear_bit(WQ_FLAG_DEDICATED, &wq->flags); clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; } static void idxd_wq_ref_release(struct percpu_ref *ref) @@ -839,15 +841,12 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); } + if (wq->size == 0 && wq->type != IDXD_WQT_NONE) + wq->size = WQ_DEFAULT_QUEUE_DEPTH; + /* byte 0-3 */ wq->wqcfg->wq_size = wq->size; - if (wq->size == 0) { - idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE; - dev_warn(dev, "Incorrect work queue size: 0\n"); - return -EINVAL; - } - /* bytes 4-7 */ wq->wqcfg->wq_thresh = wq->threshold; @@ -993,8 +992,6 @@ static int idxd_wqs_setup(struct idxd_device *idxd) if (!wq->group) continue; - if (!wq->size) - continue; if (wq_shared(wq) && !device_swq_supported(idxd)) { idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 51e79201636c..89e98d69115b 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -150,6 +150,10 @@ struct idxd_cdev { #define WQ_NAME_SIZE 1024 #define WQ_TYPE_SIZE 10 +#define WQ_DEFAULT_QUEUE_DEPTH 16 +#define WQ_DEFAULT_MAX_XFER SZ_2M +#define WQ_DEFAULT_MAX_BATCH 32 + enum idxd_op_type { IDXD_OP_BLOCK = 0, IDXD_OP_NONBLOCK = 1, diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 912839bf0be3..94ecd4bf0f0e 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -246,8 +246,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) init_waitqueue_head(&wq->err_queue); init_completion(&wq->wq_dead); init_completion(&wq->wq_resurrect); - wq->max_xfer_bytes = idxd->max_xfer_bytes; - wq->max_batch_size = idxd->max_batch_size; + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { put_device(conf_dev); From 7930d85535751bc8b05c6731c6b79d874671f13c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 29 Nov 2021 10:19:38 -0700 Subject: [PATCH 098/493] dmaengine: idxd: add knob for enqcmds retries Add a sysfs knob to allow tuning of retries for the kernel ENQCMDS descriptor submission. While on host, it is not as likely that ENQCMDS return busy during normal operations due to the driver controlling the number of descriptors allocated for submission. However, when the driver is operating as a guest driver, the chance of retry goes up significantly due to sharing a wq with multiple VMs. A default value is provided with the system admin being able to tune the value on a per WQ basis. Suggested-by: Sanjay Kumar Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163820629464.2702134.7577370098568297574.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 7 ++++ drivers/dma/idxd/device.c | 1 + drivers/dma/idxd/idxd.h | 5 +++ drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/irq.c | 2 +- drivers/dma/idxd/submit.c | 31 ++++++++++++---- drivers/dma/idxd/sysfs.c | 36 +++++++++++++++++++ 7 files changed, 75 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index df4afbccf037..4d3a23eb05b9 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -220,6 +220,13 @@ Contact: dmaengine@vger.kernel.org Description: Show the current number of entries in this WQ if WQ Occupancy Support bit WQ capabilities is 1. +What: /sys/bus/dsa/devices/wq./enqcmds_retries +Date Oct 29, 2021 +KernelVersion: 5.17.0 +Contact: dmaengine@vger.kernel.org +Description: Indicate the number of retires for an enqcmds submission on a shared wq. + A max value to set attribute is capped at 64. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 36e213a8108d..5a50ee6f6881 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -387,6 +387,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->threshold = 0; wq->priority = 0; wq->ats_dis = 0; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 89e98d69115b..6b9bfdc557fe 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -52,6 +52,9 @@ enum idxd_type { #define IDXD_NAME_SIZE 128 #define IDXD_PMU_EVENT_MAX 64 +#define IDXD_ENQCMDS_RETRIES 32 +#define IDXD_ENQCMDS_MAX_RETRIES 64 + struct idxd_device_driver { const char *name; enum idxd_dev_type *type; @@ -173,6 +176,7 @@ struct idxd_dma_chan { struct idxd_wq { void __iomem *portal; u32 portal_offset; + unsigned int enqcmds_retries; struct percpu_ref wq_active; struct completion wq_dead; struct completion wq_resurrect; @@ -584,6 +588,7 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq); int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype); void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc); +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc); /* dmaengine */ int idxd_register_dma_device(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 94ecd4bf0f0e..8b3afce9ea67 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -248,6 +248,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) init_completion(&wq->wq_resurrect); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { put_device(conf_dev); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index a3bf3ea84587..0b0055a0ad2a 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -98,7 +98,7 @@ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) if (wq_dedicated(wq)) { iosubmit_cmds512(portal, &desc, 1); } else { - rc = enqcmds(portal, &desc); + rc = idxd_enqcmds(wq, portal, &desc); /* This should not fail unless hardware failed. */ if (rc < 0) dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 776fa81db61d..569815a84e95 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -123,6 +123,29 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); } +/* + * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver + * has better control of number of descriptors being submitted to a shared wq by limiting + * the number of driver allocated descriptors to the wq size. However, when the swq is + * exported to a guest kernel, it may be shared with multiple guest kernels. This means + * the likelihood of getting busy returned on the swq when submitting goes significantly up. + * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving + * up. The sysfs knob can be tuned by the system administrator. + */ +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) +{ + int rc, retries = 0; + + do { + rc = enqcmds(portal, desc); + if (rc == 0) + break; + cpu_relax(); + } while (retries++ < wq->enqcmds_retries); + + return rc; +} + int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; @@ -166,13 +189,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { - /* - * It's not likely that we would receive queue full rejection - * since the descriptor allocation gates at wq size. If we - * receive a -EAGAIN, that means something went wrong such as the - * device is not accepting descriptor at all. - */ - rc = enqcmds(portal, desc->hw); + rc = idxd_enqcmds(wq, portal, desc->hw); if (rc < 0) { percpu_ref_put(&wq->wq_active); /* abort operation frees the descriptor */ diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 90857e776273..c0fec88ff6c1 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -945,6 +945,41 @@ static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *at static struct device_attribute dev_attr_wq_occupancy = __ATTR(occupancy, 0444, wq_occupancy_show, NULL); +static ssize_t wq_enqcmds_retries_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", wq->enqcmds_retries); +} + +static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + int rc; + unsigned int retries; + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + rc = kstrtouint(buf, 10, &retries); + if (rc < 0) + return rc; + + if (retries > IDXD_ENQCMDS_MAX_RETRIES) + retries = IDXD_ENQCMDS_MAX_RETRIES; + + wq->enqcmds_retries = retries; + return count; +} + +static struct device_attribute dev_attr_wq_enqcmds_retries = + __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -961,6 +996,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, &dev_attr_wq_occupancy.attr, + &dev_attr_wq_enqcmds_retries.attr, NULL, }; From d697e83125950f64210b267d5c9c13fa7e4a43b9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 4 Dec 2021 14:00:32 +0000 Subject: [PATCH 099/493] dmaengine: stm32-mdma: Remove redundant initialization of pointer hwdesc The pointer hwdesc is being initialized with a value that is never read, it is being updated later in a for-loop. The assignment is redundant and can be removed. Signed-off-by: Colin Ian King Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211204140032.548066-1-colin.i.king@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index d30a4a28d3bf..805a449ff301 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1279,7 +1279,7 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, u32 curr_hwdesc) { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); - struct stm32_mdma_hwdesc *hwdesc = desc->node[0].hwdesc; + struct stm32_mdma_hwdesc *hwdesc; u32 cbndtr, residue, modulo, burst_size; int i; From de8f2c05754a7df12634c73640a162a610795a80 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Nov 2021 16:54:04 +0100 Subject: [PATCH 100/493] dmaengine: stm32-mdma: Use bitfield helpers Use the FIELD_{GET,PREP}() helpers, instead of defining custom macros implementing the same operations. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/36ceab242a594233dc7dc6f1dddb4ac32d1e846f.1637593297.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 74 +++++++++++++--------------------------- 1 file changed, 23 insertions(+), 51 deletions(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 805a449ff301..76cf2e333e63 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -10,6 +10,7 @@ * Inspired by stm32-dma.c and dma-jz4780.c */ +#include #include #include #include @@ -32,13 +33,6 @@ #include "virt-dma.h" -/* MDMA Generic getter/setter */ -#define STM32_MDMA_SHIFT(n) (ffs(n) - 1) -#define STM32_MDMA_SET(n, mask) (((n) << STM32_MDMA_SHIFT(mask)) & \ - (mask)) -#define STM32_MDMA_GET(n, mask) (((n) & (mask)) >> \ - STM32_MDMA_SHIFT(mask)) - #define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */ #define STM32_MDMA_GISR1 0x0004 /* MDMA Int Status Reg 2 */ @@ -80,8 +74,7 @@ #define STM32_MDMA_CCR_HEX BIT(13) #define STM32_MDMA_CCR_BEX BIT(12) #define STM32_MDMA_CCR_PL_MASK GENMASK(7, 6) -#define STM32_MDMA_CCR_PL(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CCR_PL_MASK) +#define STM32_MDMA_CCR_PL(n) FIELD_PREP(STM32_MDMA_CCR_PL_MASK, (n)) #define STM32_MDMA_CCR_TCIE BIT(5) #define STM32_MDMA_CCR_BTIE BIT(4) #define STM32_MDMA_CCR_BRTIE BIT(3) @@ -99,48 +92,33 @@ #define STM32_MDMA_CTCR_BWM BIT(31) #define STM32_MDMA_CTCR_SWRM BIT(30) #define STM32_MDMA_CTCR_TRGM_MSK GENMASK(29, 28) -#define STM32_MDMA_CTCR_TRGM(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_TRGM_MSK) -#define STM32_MDMA_CTCR_TRGM_GET(n) STM32_MDMA_GET((n), \ - STM32_MDMA_CTCR_TRGM_MSK) +#define STM32_MDMA_CTCR_TRGM(n) FIELD_PREP(STM32_MDMA_CTCR_TRGM_MSK, (n)) +#define STM32_MDMA_CTCR_TRGM_GET(n) FIELD_GET(STM32_MDMA_CTCR_TRGM_MSK, (n)) #define STM32_MDMA_CTCR_PAM_MASK GENMASK(27, 26) -#define STM32_MDMA_CTCR_PAM(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_PAM_MASK) +#define STM32_MDMA_CTCR_PAM(n) FIELD_PREP(STM32_MDMA_CTCR_PAM_MASK, (n)) #define STM32_MDMA_CTCR_PKE BIT(25) #define STM32_MDMA_CTCR_TLEN_MSK GENMASK(24, 18) -#define STM32_MDMA_CTCR_TLEN(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_TLEN_MSK) -#define STM32_MDMA_CTCR_TLEN_GET(n) STM32_MDMA_GET((n), \ - STM32_MDMA_CTCR_TLEN_MSK) +#define STM32_MDMA_CTCR_TLEN(n) FIELD_PREP(STM32_MDMA_CTCR_TLEN_MSK, (n)) +#define STM32_MDMA_CTCR_TLEN_GET(n) FIELD_GET(STM32_MDMA_CTCR_TLEN_MSK, (n)) #define STM32_MDMA_CTCR_LEN2_MSK GENMASK(25, 18) -#define STM32_MDMA_CTCR_LEN2(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_LEN2_MSK) -#define STM32_MDMA_CTCR_LEN2_GET(n) STM32_MDMA_GET((n), \ - STM32_MDMA_CTCR_LEN2_MSK) +#define STM32_MDMA_CTCR_LEN2(n) FIELD_PREP(STM32_MDMA_CTCR_LEN2_MSK, (n)) +#define STM32_MDMA_CTCR_LEN2_GET(n) FIELD_GET(STM32_MDMA_CTCR_LEN2_MSK, (n)) #define STM32_MDMA_CTCR_DBURST_MASK GENMASK(17, 15) -#define STM32_MDMA_CTCR_DBURST(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_DBURST_MASK) +#define STM32_MDMA_CTCR_DBURST(n) FIELD_PREP(STM32_MDMA_CTCR_DBURST_MASK, (n)) #define STM32_MDMA_CTCR_SBURST_MASK GENMASK(14, 12) -#define STM32_MDMA_CTCR_SBURST(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_SBURST_MASK) +#define STM32_MDMA_CTCR_SBURST(n) FIELD_PREP(STM32_MDMA_CTCR_SBURST_MASK, (n)) #define STM32_MDMA_CTCR_DINCOS_MASK GENMASK(11, 10) -#define STM32_MDMA_CTCR_DINCOS(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_DINCOS_MASK) +#define STM32_MDMA_CTCR_DINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_DINCOS_MASK, (n)) #define STM32_MDMA_CTCR_SINCOS_MASK GENMASK(9, 8) -#define STM32_MDMA_CTCR_SINCOS(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_SINCOS_MASK) +#define STM32_MDMA_CTCR_SINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_SINCOS_MASK, (n)) #define STM32_MDMA_CTCR_DSIZE_MASK GENMASK(7, 6) -#define STM32_MDMA_CTCR_DSIZE(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_DSIZE_MASK) +#define STM32_MDMA_CTCR_DSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_DSIZE_MASK, (n)) #define STM32_MDMA_CTCR_SSIZE_MASK GENMASK(5, 4) -#define STM32_MDMA_CTCR_SSIZE(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_SSIZE_MASK) +#define STM32_MDMA_CTCR_SSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_SSIZE_MASK, (n)) #define STM32_MDMA_CTCR_DINC_MASK GENMASK(3, 2) -#define STM32_MDMA_CTCR_DINC(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_DINC_MASK) +#define STM32_MDMA_CTCR_DINC(n) FIELD_PREP(STM32_MDMA_CTCR_DINC_MASK, (n)) #define STM32_MDMA_CTCR_SINC_MASK GENMASK(1, 0) -#define STM32_MDMA_CTCR_SINC(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_SINC_MASK) +#define STM32_MDMA_CTCR_SINC(n) FIELD_PREP(STM32_MDMA_CTCR_SINC_MASK, (n)) #define STM32_MDMA_CTCR_CFG_MASK (STM32_MDMA_CTCR_SINC_MASK \ | STM32_MDMA_CTCR_DINC_MASK \ | STM32_MDMA_CTCR_SINCOS_MASK \ @@ -151,16 +129,13 @@ /* MDMA Channel x block number of data register */ #define STM32_MDMA_CBNDTR(x) (0x54 + 0x40 * (x)) #define STM32_MDMA_CBNDTR_BRC_MK GENMASK(31, 20) -#define STM32_MDMA_CBNDTR_BRC(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CBNDTR_BRC_MK) -#define STM32_MDMA_CBNDTR_BRC_GET(n) STM32_MDMA_GET((n), \ - STM32_MDMA_CBNDTR_BRC_MK) +#define STM32_MDMA_CBNDTR_BRC(n) FIELD_PREP(STM32_MDMA_CBNDTR_BRC_MK, (n)) +#define STM32_MDMA_CBNDTR_BRC_GET(n) FIELD_GET(STM32_MDMA_CBNDTR_BRC_MK, (n)) #define STM32_MDMA_CBNDTR_BRDUM BIT(19) #define STM32_MDMA_CBNDTR_BRSUM BIT(18) #define STM32_MDMA_CBNDTR_BNDT_MASK GENMASK(16, 0) -#define STM32_MDMA_CBNDTR_BNDT(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CBNDTR_BNDT_MASK) +#define STM32_MDMA_CBNDTR_BNDT(n) FIELD_PREP(STM32_MDMA_CBNDTR_BNDT_MASK, (n)) /* MDMA Channel x source address register */ #define STM32_MDMA_CSAR(x) (0x58 + 0x40 * (x)) @@ -171,11 +146,9 @@ /* MDMA Channel x block repeat address update register */ #define STM32_MDMA_CBRUR(x) (0x60 + 0x40 * (x)) #define STM32_MDMA_CBRUR_DUV_MASK GENMASK(31, 16) -#define STM32_MDMA_CBRUR_DUV(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CBRUR_DUV_MASK) +#define STM32_MDMA_CBRUR_DUV(n) FIELD_PREP(STM32_MDMA_CBRUR_DUV_MASK, (n)) #define STM32_MDMA_CBRUR_SUV_MASK GENMASK(15, 0) -#define STM32_MDMA_CBRUR_SUV(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CBRUR_SUV_MASK) +#define STM32_MDMA_CBRUR_SUV(n) FIELD_PREP(STM32_MDMA_CBRUR_SUV_MASK, (n)) /* MDMA Channel x link address register */ #define STM32_MDMA_CLAR(x) (0x64 + 0x40 * (x)) @@ -185,8 +158,7 @@ #define STM32_MDMA_CTBR_DBUS BIT(17) #define STM32_MDMA_CTBR_SBUS BIT(16) #define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0) -#define STM32_MDMA_CTBR_TSEL(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTBR_TSEL_MASK) +#define STM32_MDMA_CTBR_TSEL(n) FIELD_PREP(STM32_MDMA_CTBR_TSEL_MASK, (n)) /* MDMA Channel x mask address register */ #define STM32_MDMA_CMAR(x) (0x70 + 0x40 * (x)) From d5aeba456e666c6f2c01e8b4e5bba2affabcdd09 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 23 Nov 2021 22:26:18 +0100 Subject: [PATCH 101/493] dmaengine: sh: Use bitmap_zalloc() when applicable 'shdma_slave_used' is a bitmap. So use 'bitmap_zalloc()' to simplify code, improve the semantic and avoid some open-coded arithmetic in allocator arguments. Also change the corresponding 'kfree()' into 'bitmap_free()' to keep consistency. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/3efaf2784424ae3d7411dc47f8b6b03e7bb8c059.1637702701.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/sh/shdma-base.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 7f72b3f4cd1a..0a2eb5f5a6e8 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -1042,9 +1042,7 @@ EXPORT_SYMBOL(shdma_cleanup); static int __init shdma_enter(void) { - shdma_slave_used = kcalloc(DIV_ROUND_UP(slave_num, BITS_PER_LONG), - sizeof(long), - GFP_KERNEL); + shdma_slave_used = bitmap_zalloc(slave_num, GFP_KERNEL); if (!shdma_slave_used) return -ENOMEM; return 0; @@ -1053,7 +1051,7 @@ module_init(shdma_enter); static void __exit shdma_exit(void) { - kfree(shdma_slave_used); + bitmap_free(shdma_slave_used); } module_exit(shdma_exit); From ff8752d7617da301ad3b7ef18caa58d135ee8c3c Mon Sep 17 00:00:00 2001 From: German Gomez Date: Thu, 16 Dec 2021 15:24:04 +0000 Subject: [PATCH 102/493] perf arm-spe: Synthesize SPE instruction events Synthesize instruction events for every ARM SPE record. Arm SPE implements a hardware-based sample period, and perf implements a software-based one. Add a warning message to inform the user of this. Signed-off-by: German Gomez Tested-by: Leo Yan Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211216152404.52474-1-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe.c | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 8a3828f86901..d2b64e3f588b 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -58,6 +58,8 @@ struct arm_spe { u8 sample_branch; u8 sample_remote_access; u8 sample_memory; + u8 sample_instructions; + u64 instructions_sample_period; u64 l1d_miss_id; u64 l1d_access_id; @@ -68,6 +70,7 @@ struct arm_spe { u64 branch_miss_id; u64 remote_access_id; u64 memory_id; + u64 instructions_id; u64 kernel_start; @@ -90,6 +93,7 @@ struct arm_spe_queue { u64 time; u64 timestamp; struct thread *thread; + u64 period_instructions; }; static void arm_spe_dump(struct arm_spe *spe __maybe_unused, @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, speq->pid = -1; speq->tid = -1; speq->cpu = -1; + speq->period_instructions = 0; /* params set */ params.get_trace = arm_spe_get_trace; @@ -353,6 +358,35 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, + u64 spe_events_id, u64 data_src) +{ + struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; + union perf_event *event = speq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + /* + * Handles perf instruction sampling period. + */ + speq->period_instructions++; + if (speq->period_instructions < spe->instructions_sample_period) + return 0; + speq->period_instructions = 0; + + arm_spe_prep_sample(spe, speq, event, &sample); + + sample.id = spe_events_id; + sample.stream_id = spe_events_id; + sample.addr = record->virt_addr; + sample.phys_addr = record->phys_addr; + sample.data_src = data_src; + sample.period = spe->instructions_sample_period; + sample.weight = record->latency; + + return arm_spe_deliver_synth_event(spe, speq, event, &sample); +} + #define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ ARM_SPE_REMOTE_ACCESS) @@ -482,6 +516,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } + if (spe->sample_instructions) { + err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); + if (err) + return err; + } + return 0; } @@ -1110,8 +1150,30 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) return err; spe->memory_id = id; arm_spe_set_event_name(evlist, id, "memory"); + id += 1; } + if (spe->synth_opts.instructions) { + if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { + pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); + goto synth_instructions_out; + } + if (spe->synth_opts.period > 1) + pr_warning("Arm SPE has a hardware-based sample period.\n" + "Additional instruction events will be discarded by --itrace\n"); + + spe->sample_instructions = true; + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = spe->synth_opts.period; + spe->instructions_sample_period = attr.sample_period; + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->instructions_id = id; + arm_spe_set_event_name(evlist, id, "instructions"); + } +synth_instructions_out: + return 0; } From 73cca71a903202cddc8279fc76b2da4995da5bea Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Sun, 12 Dec 2021 21:14:35 -0800 Subject: [PATCH 103/493] Input: ti_am335x_tsc - set ADCREFM for X configuration As reported by the STEPCONFIG[1-16] registered field descriptions of the TI reference manual, for the ADC "in single ended, SEL_INM_SWC_3_0 must be 1xxx". Unlike the Y and Z coordinates, this bit has not been set for the step configuration registers used to sample the X coordinate. Fixes: 1b8be32e6914 ("Input: add support for TI Touchscreen controller") Signed-off-by: Dario Binacchi Link: https://lore.kernel.org/r/20211212125358.14416-2-dariobin@libero.it Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ti_am335x_tsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index 83e685557a19..fd3ffdd23470 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -131,7 +131,8 @@ static void titsc_step_config(struct titsc *ts_dev) u32 stepenable; config = STEPCONFIG_MODE_HWSYNC | - STEPCONFIG_AVG_16 | ts_dev->bit_xp; + STEPCONFIG_AVG_16 | ts_dev->bit_xp | + STEPCONFIG_INM_ADCREFM; switch (ts_dev->wires) { case 4: config |= STEPCONFIG_INP(ts_dev->inp_yp) | ts_dev->bit_xn; From 6bfeb6c21e1bdc11c328b7d996d20f0f73c6b9b0 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Sun, 12 Dec 2021 21:14:48 -0800 Subject: [PATCH 104/493] Input: ti_am335x_tsc - fix STEPCONFIG setup for Z2 The Z2 step configuration doesn't erase the SEL_INP_SWC_3_0 bit-field before setting the ADC channel. This way its value could be corrupted by the ADC channel selected for the Z1 coordinate. Fixes: 8c896308feae ("input: ti_am335x_adc: use only FIFO0 and clean up a little") Signed-off-by: Dario Binacchi Link: https://lore.kernel.org/r/20211212125358.14416-3-dariobin@libero.it Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ti_am335x_tsc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index fd3ffdd23470..cfc943423241 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -196,7 +196,10 @@ static void titsc_step_config(struct titsc *ts_dev) STEPCONFIG_OPENDLY); end_step++; - config |= STEPCONFIG_INP(ts_dev->inp_yn); + config = STEPCONFIG_MODE_HWSYNC | + STEPCONFIG_AVG_16 | ts_dev->bit_yp | + ts_dev->bit_xn | STEPCONFIG_INM_ADCREFM | + STEPCONFIG_INP(ts_dev->inp_yn); titsc_writel(ts_dev, REG_STEPCONFIG(end_step), config); titsc_writel(ts_dev, REG_STEPDELAY(end_step), STEPCONFIG_OPENDLY); From 23dee6c6b183e41fa7e3d758e70216f670851a3f Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Sun, 12 Dec 2021 21:15:01 -0800 Subject: [PATCH 105/493] Input: ti_am335x_tsc - lower the X and Y sampling time The open delay time has to be applied only on the first sample of the X/Y coordinates because on the following samples the ADC channel is not changed. Removing this time from the samples after the first one, "ti,coordinate-readouts" greater than 1, decreases the total acquisition time, allowing to increase the number of acquired coordinates in the time unit. Signed-off-by: Dario Binacchi Link: https://lore.kernel.org/r/20211212125358.14416-4-dariobin@libero.it Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ti_am335x_tsc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index cfc943423241..f4ef218bc1b8 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -126,7 +126,7 @@ static int titsc_config_wires(struct titsc *ts_dev) static void titsc_step_config(struct titsc *ts_dev) { unsigned int config; - int i; + int i, n; int end_step, first_step, tsc_steps; u32 stepenable; @@ -151,9 +151,11 @@ static void titsc_step_config(struct titsc *ts_dev) first_step = TOTAL_STEPS - tsc_steps; /* Steps 16 to 16-coordinate_readouts is for X */ end_step = first_step + tsc_steps; + n = 0; for (i = end_step - ts_dev->coordinate_readouts; i < end_step; i++) { titsc_writel(ts_dev, REG_STEPCONFIG(i), config); - titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY); + titsc_writel(ts_dev, REG_STEPDELAY(i), + n++ == 0 ? STEPCONFIG_OPENDLY : 0); } config = 0; @@ -175,9 +177,11 @@ static void titsc_step_config(struct titsc *ts_dev) /* 1 ... coordinate_readouts is for Y */ end_step = first_step + ts_dev->coordinate_readouts; + n = 0; for (i = first_step; i < end_step; i++) { titsc_writel(ts_dev, REG_STEPCONFIG(i), config); - titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY); + titsc_writel(ts_dev, REG_STEPDELAY(i), + n++ == 0 ? STEPCONFIG_OPENDLY : 0); } /* Make CHARGECONFIG same as IDLECONFIG */ From 53b90bd9767007b87610a46b7d013123742d2802 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 12 Dec 2021 21:20:31 -0800 Subject: [PATCH 106/493] Input: ucb1400_ts - remove redundant variable penup Variable penup is assigned a value but penup is never read later, it is redundant and can be removed. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20211205000525.153999-1-colin.i.king@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ucb1400_ts.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c index e3f2c940ef3d..dfd3b35590c3 100644 --- a/drivers/input/touchscreen/ucb1400_ts.c +++ b/drivers/input/touchscreen/ucb1400_ts.c @@ -186,7 +186,6 @@ static irqreturn_t ucb1400_irq(int irqnr, void *devid) { struct ucb1400_ts *ucb = devid; unsigned int x, y, p; - bool penup; if (unlikely(irqnr != ucb->irq)) return IRQ_NONE; @@ -196,8 +195,7 @@ static irqreturn_t ucb1400_irq(int irqnr, void *devid) /* Start with a small delay before checking pendown state */ msleep(UCB1400_TS_POLL_PERIOD); - while (!ucb->stopped && !(penup = ucb1400_ts_pen_up(ucb))) { - + while (!ucb->stopped && !ucb1400_ts_pen_up(ucb)) { ucb1400_adc_enable(ucb->ac97); x = ucb1400_ts_read_xpos(ucb); y = ucb1400_ts_read_ypos(ucb); From 652c0441de588dafb68516eccd4a89662830d23b Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Sun, 19 Dec 2021 18:06:45 -0800 Subject: [PATCH 107/493] Input: byd - fix typo in a comment The double `the' in a comment is repeated, thus it should be removed. Signed-off-by: Xiang wangx Link: https://lore.kernel.org/r/20211216082735.11948-1-wangxiang@cdjrlc.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/byd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/byd.c b/drivers/input/mouse/byd.c index 6e0c5f5a2713..221a553f45cd 100644 --- a/drivers/input/mouse/byd.c +++ b/drivers/input/mouse/byd.c @@ -191,7 +191,7 @@ /* * The touchpad generates a mixture of absolute and relative packets, indicated - * by the the last byte of each packet being set to one of the following: + * by the last byte of each packet being set to one of the following: */ #define BYD_PACKET_ABSOLUTE 0xf8 #define BYD_PACKET_RELATIVE 0x00 From b0229605b1436438f24d9a97d8ecf220e0ea5900 Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Thu, 16 Dec 2021 17:06:53 +0100 Subject: [PATCH 108/493] remoteproc: rcar_rproc: Fix pm_runtime_get_sync error check pm_runtime_get_sync can also return 1 on success, change to use pm_runtime_resume_and_get which return 0 only on success. This bug has been discovered by Dan Carpenter by using Smatch static checker. Fixes: 285892a74f13 ("remoteproc: Add Renesas rcar driver") Signed-off-by: Julien Massot Reviewed-by: Geert Uytterhoeven [Fixed blank line between tags] Link: https://lore.kernel.org/r/20211216160653.203768-1-julien.massot@iot.bzh Signed-off-by: Mathieu Poirier --- drivers/remoteproc/rcar_rproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/rcar_rproc.c b/drivers/remoteproc/rcar_rproc.c index 34fd867f9f8c..3408c6e51a7c 100644 --- a/drivers/remoteproc/rcar_rproc.c +++ b/drivers/remoteproc/rcar_rproc.c @@ -167,7 +167,7 @@ static int rcar_rproc_probe(struct platform_device *pdev) } pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret) { dev_err(dev, "failed to power up\n"); return ret; From ba635863779871a4f873511199f3e0ae84d5e592 Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Thu, 16 Dec 2021 17:07:21 +0100 Subject: [PATCH 109/493] remoteproc: rcar_rproc: Remove trailing semicolon Remove trailing semicolon. Signed-off-by: Julien Massot Link: https://lore.kernel.org/r/20211216160721.203794-1-julien.massot@iot.bzh Reviewed-by: Geert Uytterhoeven Signed-off-by: Mathieu Poirier --- drivers/remoteproc/rcar_rproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/rcar_rproc.c b/drivers/remoteproc/rcar_rproc.c index 3408c6e51a7c..aa86154109c7 100644 --- a/drivers/remoteproc/rcar_rproc.c +++ b/drivers/remoteproc/rcar_rproc.c @@ -163,7 +163,7 @@ static int rcar_rproc_probe(struct platform_device *pdev) if (IS_ERR(priv->rst)) { ret = PTR_ERR(priv->rst); dev_err_probe(dev, ret, "fail to acquire rproc reset\n"); - return ret;; + return ret; } pm_runtime_enable(dev); From f8464e084dd3c4cf37bdbeb06fea0afbd2e0f4e8 Mon Sep 17 00:00:00 2001 From: Carsten Haitzler Date: Wed, 15 Dec 2021 16:03:54 +0000 Subject: [PATCH 110/493] perf test: Use 3 digits for test numbering now we can have more tests This is in preparation for adding more tests that will need the test number to be 3 digts so they align nicely in the output. Reviewed-by: Leo Yan Signed-off-by: Carsten Haitzler Cc: Mathieu Poirier Cc: Mike Leach Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Link: http://lore.kernel.org/lkml/20211215160403.69264-3-carsten.haitzler@foss.arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 1fb9f2a11d63..fac3717d9ba1 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -421,7 +421,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width, continue; st.file = ent->d_name; - pr_info("%2d: %-*s:", i, width, test_suite.desc); + pr_info("%3d: %-*s:", i, width, test_suite.desc); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); @@ -471,7 +471,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) continue; } - pr_info("%2d: %-*s:", i, width, test_description(t, -1)); + pr_info("%3d: %-*s:", i, width, test_description(t, -1)); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); @@ -511,7 +511,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) curr, argc, argv)) continue; - pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, + pr_info("%3d.%1d: %-*s:", i, subi + 1, subw, test_description(t, subi)); test_and_print(t, subi); } @@ -546,7 +546,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) if (!perf_test__matches(t.desc, curr, argc, argv)) continue; - pr_info("%2d: %s\n", i, t.desc); + pr_info("%3d: %s\n", i, t.desc); } @@ -568,14 +568,14 @@ static int perf_test__list(int argc, const char **argv) if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) continue; - pr_info("%2d: %s\n", i, test_description(t, -1)); + pr_info("%3d: %s\n", i, test_description(t, -1)); if (has_subtests(t)) { int subn = num_subtests(t); int subi; for (subi = 0; subi < subn; subi++) - pr_info("%2d:%1d: %s\n", i, subi + 1, + pr_info("%3d:%1d: %s\n", i, subi + 1, test_description(t, subi)); } } From 7248e308a57587615431b83689cd57e957815bfc Mon Sep 17 00:00:00 2001 From: Alexandre Truong Date: Fri, 17 Dec 2021 15:45:15 +0000 Subject: [PATCH 111/493] perf tools: Record ARM64 LR register automatically On ARM64, automatically record the link register if the frame pointer mode is on. It will be used to do a dwarf unwind to find the caller of the leaf frame if the frame pointer was omitted. Reviewed-by: James Clark Signed-off-by: Alexandre Truong Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211217154521.80603-2-german.gomez@arm.com Signed-off-by: German Gomez Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/machine.c | 7 +++++++ tools/perf/builtin-record.c | 8 ++++++++ tools/perf/util/callchain.h | 2 ++ 3 files changed, 17 insertions(+) diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 7e7714290a87..d2ce31e28cd7 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -5,6 +5,8 @@ #include #include "debug.h" #include "symbol.h" +#include "callchain.h" +#include "record.h" /* On arm64, kernel text segment starts at high memory address, * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory @@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) p->end = c->start; pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); } + +void arch__add_leaf_frame_record_opts(struct record_opts *opts) +{ + opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; +} diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0338b813585a..6ac2160913ea 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2267,6 +2267,10 @@ out_free: return ret; } +void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) +{ +} + static int parse_control_option(const struct option *opt, const char *str, int unset __maybe_unused) @@ -2898,6 +2902,10 @@ int cmd_record(int argc, const char **argv) } rec->opts.target.hybrid = perf_pmu__has_hybrid(); + + if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) + arch__add_leaf_frame_record_opts(&rec->opts); + err = -ENOMEM; if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 5824134f983b..77fba053c677 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, } #endif +void arch__add_leaf_frame_record_opts(struct record_opts *opts); + char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); char *callchain_node__scnprintf_value(struct callchain_node *node, From 32bfa5bf71db672c646751da131a17aace8cceac Mon Sep 17 00:00:00 2001 From: Alexandre Truong Date: Fri, 17 Dec 2021 15:45:16 +0000 Subject: [PATCH 112/493] perf machine: Add a mechanism to inject stack frames Add a mechanism for platforms to inject stack frames for the leaf frame caller if there is enough information to determine a frame is missing from dwarf or other post processing mechanisms. Reviewed-by: James Clark Signed-off-by: Alexandre Truong Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211217154521.80603-3-german.gomez@arm.com Signed-off-by: German Gomez Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fb8496df8432..3eddad009f78 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2710,6 +2710,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, return err; } +static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused, + struct thread *thread __maybe_unused, int usr_idx __maybe_unused) +{ + return 0; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct evsel *evsel, @@ -2723,9 +2729,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err, nr_entries; + int i, j, err, nr_entries, usr_idx; int skip_idx = -1; int first_call = 0; + u64 leaf_frame_caller; if (chain) chain_nr = chain->nr; @@ -2850,6 +2857,34 @@ check_calls: continue; } + /* + * PERF_CONTEXT_USER allows us to locate where the user stack ends. + * Depending on callchain_param.order and the position of PERF_CONTEXT_USER, + * the index will be different in order to add the missing frame + * at the right place. + */ + + usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1; + + if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) { + + leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx); + + /* + * check if leaf_frame_Caller != ip to not add the same + * value twice. + */ + + if (leaf_frame_caller && leaf_frame_caller != ip) { + + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, leaf_frame_caller, + false, NULL, NULL, 0); + if (err) + return (err < 0) ? err : 0; + } + } + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, false, NULL, NULL, 0); From ab23692134489f0f563168449fc27bfb5d6b04dd Mon Sep 17 00:00:00 2001 From: Alexandre Truong Date: Fri, 17 Dec 2021 15:45:17 +0000 Subject: [PATCH 113/493] perf script: Use callchain_param_setup() instead of open coded equivalent Refactoring script__setup_sample_type() by using callchain_param_setup() to replace the duplicate code for callchain parameter setting up. Reviewed-by: James Clark Signed-off-by: Alexandre Truong Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211217154521.80603-4-german.gomez@arm.com Signed-off-by: German Gomez Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index da2175d70ac9..ab7d575f97f2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3468,16 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script) struct perf_session *session = script->session; u64 sample_type = evlist__combined_sample_type(session->evlist); - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { - if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) { - callchain_param.record_mode = CALLCHAIN_DWARF; - dwarf_callchain_users = true; - } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) - callchain_param.record_mode = CALLCHAIN_LBR; - else - callchain_param.record_mode = CALLCHAIN_FP; - } + callchain_param_setup(sample_type); if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" From aa8db3e41dae953b636ff68e944479900d149a37 Mon Sep 17 00:00:00 2001 From: Alexandre Truong Date: Fri, 17 Dec 2021 15:45:18 +0000 Subject: [PATCH 114/493] perf callchain: Enable dwarf_callchain_users on arm64 Enable dwarf_callchain_users on arm64 which will be needed to do a DWARF unwind in order to get the caller of the leaf frame. Reviewed-by: James Clark Signed-off-by: Alexandre Truong Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211217154521.80603-5-german.gomez@arm.com Signed-off-by: German Gomez Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 ++-- tools/perf/builtin-script.c | 4 ++-- tools/perf/util/callchain.c | 14 +++++++++++++- tools/perf/util/callchain.h | 2 +- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8ae400429870..1dd92d8c9279 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep) } } - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env)); if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" @@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused, * on events sample_type. */ sample_type = evlist__combined_sample_type(*pevlist); - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); return 0; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ab7d575f97f2..d308adfd1176 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2318,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, * on events sample_type. */ sample_type = evlist__combined_sample_type(evlist); - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); /* Enable fields for callchain entries */ if (symbol_conf.use_callchain && @@ -3468,7 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script) struct perf_session *session = script->session; u64 sample_type = evlist__combined_sample_type(session->evlist); - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env)); if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 8e2777133bd9..131207b91d15 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor) map__zput(node->ms.map); } -void callchain_param_setup(u64 sample_type) +void callchain_param_setup(u64 sample_type, const char *arch) { if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && @@ -1612,6 +1612,18 @@ void callchain_param_setup(u64 sample_type) else callchain_param.record_mode = CALLCHAIN_FP; } + + /* + * It's necessary to use libunwind to reliably determine the caller of + * a leaf function on aarch64, as otherwise we cannot know whether to + * start from the LR or FP. + * + * Always starting from the LR can result in duplicate or entirely + * erroneous entries. Always skipping the LR and starting from the FP + * can result in missing entries. + */ + if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64")) + dwarf_callchain_users = true; } static bool chain_match(struct callchain_list *base_chain, diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 77fba053c677..d95615daed73 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -300,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root, u64 *branch_count, u64 *predicted_count, u64 *abort_count, u64 *cycles_count); -void callchain_param_setup(u64 sample_type); +void callchain_param_setup(u64 sample_type, const char *arch); bool callchain_cnode_matched(struct callchain_node *base_cnode, struct callchain_node *pair_cnode); From ffc60350489db9f6e3010ac1e795078cb0d06efe Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 17 Dec 2021 15:45:19 +0000 Subject: [PATCH 115/493] perf tools: Refactor SMPL_REG macro in perf_regs.h Refactor the SAMPL_REG macro so that it can be used in a followup commit to obtain the masks for ARM64 registers. Reviewed-by: James Clark Signed-off-by: German Gomez Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211217154521.80603-6-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/perf_regs.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 4e6b1299c571..ce1127af05e4 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -11,8 +11,11 @@ struct sample_reg { const char *name; uint64_t mask; }; -#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } -#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } + +#define SMPL_REG_MASK(b) (1ULL << (b)) +#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) } +#define SMPL_REG2_MASK(b) (3ULL << (b)) +#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) } #define SMPL_REG_END { .name = NULL } enum { From b9f6fbb3b2c29736970ae9fcc0e82b0bd459442b Mon Sep 17 00:00:00 2001 From: Alexandre Truong Date: Fri, 17 Dec 2021 15:45:20 +0000 Subject: [PATCH 116/493] perf arm64: Inject missing frames when using 'perf record --call-graph=fp' When unwinding using frame pointers on ARM64, the return address of the current function may not have been pushed into the stack when a function was interrupted, which makes perf show an incorrect call graph to the user. Consider the following example program: void leaf() { /* long computation */ } void parent() { // (1) leaf(); // (2) } ... could be compiled into (using gcc -fno-inline -fno-omit-frame-pointer): leaf: /* long computation */ nop ret parent: // (1) stp x29, x30, [sp, -16]! mov x29, sp bl parent nop ldp x29, x30, [sp], 16 // (2) ret If the program is interrupted at (1), (2), or any point in "leaf:", the call graph will skip the callers of the current function. We can unwind using the dwarf info and check if the return addr is the same as the LR register, and inject the missing frame into the call graph. Before this patch, the above example shows the following call-graph when recording using "--call-graph fp" mode in ARM64: # Children Self Command Shared Object Symbol # ........ ........ ........ ................ ...................... # 99.86% 99.86% program3 program3 [.] leaf | ---_start __libc_start_main main leaf As can be seen, the "parent" function is missing. This is specially problematic in "leaf" because for leaf functions the compiler may always omit pushing the return addr into the stack. After this patch, it shows the correct graph: # Children Self Command Shared Object Symbol # ........ ........ ........ ................ ...................... # 99.86% 99.86% program3 program3 [.] leaf | ---_start __libc_start_main main parent leaf Reviewed-by: James Clark Signed-off-by: Alexandre Truong Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211217154521.80603-7-german.gomez@arm.com Signed-off-by: German Gomez [ Rename machine__normalize_is() to machine__normalized_is(), as suggested by James Clark ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + .../util/arm64-frame-pointer-unwind-support.c | 63 +++++++++++++++++++ .../util/arm64-frame-pointer-unwind-support.h | 10 +++ tools/perf/util/machine.c | 19 ++++-- tools/perf/util/machine.h | 1 + 5 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 tools/perf/util/arm64-frame-pointer-unwind-support.c create mode 100644 tools/perf/util/arm64-frame-pointer-unwind-support.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 294b12430d73..2a403cefcaf2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,3 +1,4 @@ +perf-y += arm64-frame-pointer-unwind-support.o perf-y += annotate.o perf-y += block-info.o perf-y += block-range.o diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c new file mode 100644 index 000000000000..4f5ecf51ed38 --- /dev/null +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "arm64-frame-pointer-unwind-support.h" +#include "callchain.h" +#include "event.h" +#include "perf_regs.h" // SMPL_REG_MASK +#include "unwind.h" + +#define perf_event_arm_regs perf_event_arm64_regs +#include "../arch/arm64/include/uapi/asm/perf_regs.h" +#undef perf_event_arm_regs + +struct entries { + u64 stack[2]; + size_t length; +}; + +static bool get_leaf_frame_caller_enabled(struct perf_sample *sample) +{ + return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs + && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); +} + +static int add_entry(struct unwind_entry *entry, void *arg) +{ + struct entries *entries = arg; + + entries->stack[entries->length++] = entry->ip; + return 0; +} + +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx) +{ + int ret; + struct entries entries = {}; + struct regs_dump old_regs = sample->user_regs; + + if (!get_leaf_frame_caller_enabled(sample)) + return 0; + + /* + * If PC and SP are not recorded, get the value of PC from the stack + * and set its mask. SP is not used when doing the unwinding but it + * still needs to be set to prevent failures. + */ + + if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { + sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); + sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; + } + + if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { + sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); + sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; + } + + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2); + sample->user_regs = old_regs; + + if (ret || entries.length != 2) + return ret; + + return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1]; +} diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h new file mode 100644 index 000000000000..32af9ce94398 --- /dev/null +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H +#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H + +#include "event.h" +#include "thread.h" + +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx); + +#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 3eddad009f78..3901440aeff9 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -34,6 +34,7 @@ #include "bpf-event.h" #include // page_size #include "cgroup.h" +#include "arm64-frame-pointer-unwind-support.h" #include #include @@ -2710,10 +2711,13 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, return err; } -static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused, - struct thread *thread __maybe_unused, int usr_idx __maybe_unused) +static u64 get_leaf_frame_caller(struct perf_sample *sample, + struct thread *thread, int usr_idx) { - return 0; + if (machine__normalized_is(thread->maps->machine, "arm64")) + return get_leaf_frame_caller_aarch64(sample, thread, usr_idx); + else + return 0; } static int thread__resolve_callchain_sample(struct thread *thread, @@ -3114,14 +3118,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, } /* - * Compares the raw arch string. N.B. see instead perf_env__arch() if a - * normalized arch is needed. + * Compares the raw arch string. N.B. see instead perf_env__arch() or + * machine__normalized_is() if a normalized arch is needed. */ bool machine__is(struct machine *machine, const char *arch) { return machine && !strcmp(perf_env__raw_arch(machine->env), arch); } +bool machine__normalized_is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__arch(machine->env), arch); +} + int machine__nr_cpus_avail(struct machine *machine) { return machine ? perf_env__nr_cpus_avail(machine->env) : 0; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a143087eeb47..c5a45dc8df4c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -208,6 +208,7 @@ static inline bool machine__is_host(struct machine *machine) } bool machine__is(struct machine *machine, const char *arch); +bool machine__normalized_is(struct machine *machine, const char *arch); int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); From 7fbddf40b881a2430daf1bd03ba80e871a2fadce Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 6 Dec 2021 14:47:47 +0530 Subject: [PATCH 117/493] tools headers UAPI: Add new macros for mem_hops field to perf_event.h Add new macros for mem_hops field which can be used to represent remote-node, socket and board level details. Currently the code had macro for HOPS_0 which, corresponds to data coming from another core but same node. Add new macros for HOPS_1 to HOPS_3 to represent remote-node, socket and board level data. Also add corresponding strings in the mem_hops array to represent mem_hop field data in perf_mem__lvl_scnprintf function Incase mem_hops field is used, PERF_MEM_LVLNUM field also need to be set inorder to represent the data source. Hence printing data source via PERF_MEM_LVL field can be skip in that scenario. For ex: Encodings for mem_hops fields with L2 cache: L2 - local L2 L2 | REMOTE | HOPS_0 - remote core, same node L2 L2 | REMOTE | HOPS_1 - remote node, same socket L2 L2 | REMOTE | HOPS_2 - remote socket, same board L2 L2 | REMOTE | HOPS_3 - remote board L2 Signed-off-by: Kajol Jain Acked-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Athira Jajeev Cc: Daniel Borkmann Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: Paul Mackerras Cc: Song Liu Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/20211206091749.87585-3-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 5 ++++- tools/perf/util/mem-events.c | 29 +++++++++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bd8860eeb291..4cd39aaccbe7 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1332,7 +1332,10 @@ union perf_mem_data_src { /* hop level */ #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -/* 2-7 available */ +#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ +#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ +#define PERF_MEM_HOPS_3 0x04 /* remote board */ +/* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 3167b4628b6d..ed0ab838bcc5 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -309,6 +309,9 @@ static const char * const mem_hops[] = { * to be set with mem_hops field. */ "core, same node", + "node, same socket", + "socket, same board", + "board", }; int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) @@ -316,7 +319,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) size_t i, l = 0; u64 m = PERF_MEM_LVL_NA; u64 hit, miss; - int printed; + int printed = 0; if (mem_info) m = mem_info->data_src.mem_lvl; @@ -335,18 +338,22 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) l += 7; } - if (mem_info && mem_info->data_src.mem_hops) + /* + * Incase mem_hops field is set, we can skip printing data source via + * PERF_MEM_LVL namespace. + */ + if (mem_info && mem_info->data_src.mem_hops) { l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]); - - printed = 0; - for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (printed++) { - strcat(out, " or "); - l += 4; + } else { + for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (printed++) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, mem_lvl[i]); } - l += scnprintf(out + l, sz - l, mem_lvl[i]); } if (mem_info && mem_info->data_src.mem_lvl_num) { From 0ebce3d65f1f53c936fdd51e975bd876ba7ed64f Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 6 Dec 2021 14:47:48 +0530 Subject: [PATCH 118/493] perf powerpc: Add encodings to represent data based on newer composite PERF_MEM_LVLNUM* fields The code represent data coming from L1/L2/L3 cache hits based on PERF_MEM_LVL_* namespace, which is in the process of deprecation in the favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Add data source encodings to represent L1/L2/L3 cache hits based on newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields for power10 and older platforms Result in power9 system without patch changes: localhost:# ./perf mem report --sort="mem,sym,dso" --stdio # Overhead Samples Memory access Symbol Shared Object # ........ ....... ............. ................................. ................ # 29.51% 1 L2 hit [k] perf_event_exec [kernel.vmlinux] 27.05% 1 L1 hit [k] perf_ctx_unlock [kernel.vmlinux] 13.93% 1 L1 hit [k] vtime_delta [kernel.vmlinux] 13.11% 1 L1 hit [k] prepend_path.isra.11 [kernel.vmlinux] 8.20% 1 L1 hit [.] 00000038.plt_call.__GI_strlen libc-2.28.so 8.20% 1 L1 hit [k] perf_event_interrupt [kernel.vmlinux] Result in power9 system with patch changes: localhost:# ./perf mem report --sort="mem,sym,dso" --stdio # Overhead Samples Memory access Symbol Shared Object # ........ ....... ............. .......................... ................ # 36.63% 1 L2 or L2 hit [k] perf_event_exec [kernel.vmlinux] 25.50% 1 L1 or L1 hit [k] vtime_delta [kernel.vmlinux] 13.12% 1 L1 or L1 hit [k] unmap_region [kernel.vmlinux] 12.62% 1 L1 or L1 hit [k] perf_sample_event_took [kernel.vmlinux] 6.93% 1 L1 or L1 hit [k] perf_ctx_unlock [kernel.vmlinux] 5.20% 1 L1 or L1 hit [.] __memcpy_power7 libc-2.28.so Reviewed-by: Madhavan Srinivasan Signed-off-by: Kajol Jain Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Athira Jajeev Cc: Daniel Borkmann Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Song Liu Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/20211206091749.87585-4-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/powerpc/perf/isa207-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 7ea873ab2e6f..6c6bc8b7d887 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -220,13 +220,13 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) /* Nothing to do */ break; case 1: - ret = PH(LVL, L1); + ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT); break; case 2: - ret = PH(LVL, L2); + ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); break; case 3: - ret = PH(LVL, L3); + ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); break; case 4: if (sub_idx <= 1) From af2b24f228a0373ac65eb7a502e0bc31e2c0269d Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 6 Dec 2021 14:47:49 +0530 Subject: [PATCH 119/493] perf powerpc: Add data source encodings for power10 platform The code represent memory/cache level data based on PERF_MEM_LVL_* namespace, which is in the process of deprication in the favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Add data source encodings to represent cache/memory data based on newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Add data source encodings to represent data coming from local memory/Remote memory/distant memory and remote/distant cache hits. Inorder to represent data coming from OpenCAPI cache/memory, we use LVLNUM "PMEM" field which is used to present persistent memory accesses. Result in power10 system with patch changes: localhost:# ./perf mem report --sort="mem,sym,dso" --stdio # Overhead Samples Memory access Symbol Shared Object # ........ ....... ...................................... ........................... ................. # 29.46% 2331 L1 or L1 hit [.] __random libc-2.28.so 23.11% 2121 L1 or L1 hit [.] producer_populate_cache producer_consumer 18.56% 1758 L1 or L1 hit [.] __random_r libc-2.28.so 15.64% 1559 L2 or L2 hit [.] __random libc-2.28.so ..... 0.09% 5 Remote socket, same board Any cache hit [.] __random libc-2.28.so 0.07% 4 Remote socket, same board Any cache hit [.] __random libc-2.28.so ..... Reviewed-by: Madhavan Srinivasan Signed-off-by: Kajol Jain Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Athira Jajeev Cc: Daniel Borkmann Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Song Liu Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/20211206091749.87585-5-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/powerpc/perf/isa207-common.c | 54 ++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 6c6bc8b7d887..4037ea652522 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -229,13 +229,28 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); break; case 4: - if (sub_idx <= 1) - ret = PH(LVL, LOC_RAM); - else if (sub_idx > 1 && sub_idx <= 2) - ret = PH(LVL, REM_RAM1); - else - ret = PH(LVL, REM_RAM2); - ret |= P(SNOOP, HIT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + ret = P(SNOOP, HIT); + + if (sub_idx == 1) + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); + else if (sub_idx == 2 || sub_idx == 3) + ret |= P(LVL, HIT) | LEVEL(PMEM); + else if (sub_idx == 4) + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); + else if (sub_idx == 5 || sub_idx == 7) + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; + else if (sub_idx == 6) + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); + } else { + if (sub_idx <= 1) + ret = PH(LVL, LOC_RAM); + else if (sub_idx > 1 && sub_idx <= 2) + ret = PH(LVL, REM_RAM1); + else + ret = PH(LVL, REM_RAM2); + ret |= P(SNOOP, HIT); + } break; case 5: if (cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) } break; case 6: - ret = PH(LVL, REM_CCE2); - if ((sub_idx == 0) || (sub_idx == 2)) - ret |= P(SNOOP, HIT); - else if ((sub_idx == 1) || (sub_idx == 3)) - ret |= P(SNOOP, HITM); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (sub_idx == 0) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 2); + else if (sub_idx == 1) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 2); + else if (sub_idx == 2) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 3); + else if (sub_idx == 3) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 3); + } else { + ret = PH(LVL, REM_CCE2); + if (sub_idx == 0 || sub_idx == 2) + ret |= P(SNOOP, HIT); + else if (sub_idx == 1 || sub_idx == 3) + ret |= P(SNOOP, HITM); + } break; case 7: ret = PM(LVL, L1); From 1882de7fc56c2b0ea91dd9fd9922d434fc3feb15 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 22 Dec 2021 12:31:03 +0800 Subject: [PATCH 120/493] efi: Introduce EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER and corresponding structures Platform Firmware Runtime Update image starts with UEFI headers, and the headers are defined in UEFI specification, but some of them have not been defined in the kernel yet. For example, the header layout of a capsule file looks like this: EFI_CAPSULE_HEADER EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER EFI_FIRMWARE_IMAGE_AUTHENTICATION These structures would be used by the Platform Firmware Runtime Update driver to parse the format of capsule file to verify if the corresponding version number is valid. In this way, if the user provides an invalid capsule image, the kernel could be used as a guard to reject it, without switching to the Management Mode (which might be costly). EFI_CAPSULE_HEADER has been defined in the kernel, but the other structures have not been defined yet, so do that. Besides, EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER and EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER are required to be packed in the uefi specification. For this reason, use the __packed attribute to indicate to the compiler that the entire structure can appear misaligned in memory (as suggested by Ard) in case one of them follows the other directly in a capsule header. Acked-by: Ard Biesheuvel Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- include/linux/efi.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/include/linux/efi.h b/include/linux/efi.h index dbd39b20e034..80e970f7e6f8 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -148,6 +148,52 @@ typedef struct { u32 imagesize; } efi_capsule_header_t; +/* EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER */ +struct efi_manage_capsule_header { + u32 ver; + u16 emb_drv_cnt; + u16 payload_cnt; + /* + * Variable-size array of the size given by the sum of + * emb_drv_cnt and payload_cnt. + */ + u64 offset_list[]; +} __packed; + +/* EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER */ +struct efi_manage_capsule_image_header { + u32 ver; + efi_guid_t image_type_id; + u8 image_index; + u8 reserved_bytes[3]; + u32 image_size; + u32 vendor_code_size; + /* hw_ins was introduced in version 2 */ + u64 hw_ins; + /* capsule_support was introduced in version 3 */ + u64 capsule_support; +} __packed; + +/* WIN_CERTIFICATE */ +struct win_cert { + u32 len; + u16 rev; + u16 cert_type; +}; + +/* WIN_CERTIFICATE_UEFI_GUID */ +struct win_cert_uefi_guid { + struct win_cert hdr; + efi_guid_t cert_type; + u8 cert_data[]; +}; + +/* EFI_FIRMWARE_IMAGE_AUTHENTICATION */ +struct efi_image_auth { + u64 mon_count; + struct win_cert_uefi_guid auth_info; +}; + /* * EFI capsule flags */ From 0db89fa243e5edc5de38c88b369e4c3755c5fb74 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 22 Dec 2021 12:31:41 +0800 Subject: [PATCH 121/493] ACPI: Introduce Platform Firmware Runtime Update device driver Introduce the pfr_update driver which can be used for Platform Firmware Runtime code injection and driver update [1]. The user is expected to provide the EFI capsule, and pass it to the driver by writing the capsule to a device special file. The capsule is transferred by the driver to the platform firmware with the help of an ACPI _DSM method under the special ACPI Platform Firmware Runtime Update device (INTC1080), and the actual firmware update is carried out by the low-level Management Mode code in the platform firmware. This change allows certain pieces of the platform firmware to be updated on the fly while the system is running (runtime) without the need to restart it, which is key in the cases when the system needs to be available 100% of the time and it cannot afford the downtime related to restarting it, or when the work carried out by the system is particularly important, so it cannot be interrupted, and it is not practical to wait until it is complete. Link: https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf # [1] Tested-by: Hongyu Ning Signed-off-by: Chen Yu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- .../userspace-api/ioctl/ioctl-number.rst | 1 + drivers/acpi/Kconfig | 18 + drivers/acpi/Makefile | 1 + drivers/acpi/pfr_update.c | 575 ++++++++++++++++++ include/uapi/linux/pfrut.h | 174 ++++++ 5 files changed, 769 insertions(+) create mode 100644 drivers/acpi/pfr_update.c create mode 100644 include/uapi/linux/pfrut.h diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index cfe6cccf0f44..687efcf245c1 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -367,6 +367,7 @@ Code Seq# Include File Comments 0xE5 00-3F linux/fuse.h 0xEC 00-01 drivers/platform/chrome/cros_ec_dev.h ChromeOS EC driver +0xEE 00-09 uapi/linux/pfrut.h Platform Firmware Runtime Update and Telemetry 0xF3 00-3F drivers/usb/misc/sisusbvga/sisusb.h sisfb (in development) 0xF6 all LTTng Linux Trace Toolkit Next Generation diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index cdbdf68bd98f..d0b3ca9d4a97 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -517,6 +517,24 @@ config ACPI_CONFIGFS userspace. The configurable ACPI groups will be visible under /config/acpi, assuming configfs is mounted under /config. +config ACPI_PFRUT + tristate "ACPI Platform Firmware Runtime Update and Telemetry" + depends on 64BIT + help + This mechanism allows certain pieces of the platform firmware + to be updated on the fly while the system is running (runtime) + without the need to restart it, which is key in the cases when + the system needs to be available 100% of the time and it cannot + afford the downtime related to restarting it, or when the work + carried out by the system is particularly important, so it cannot + be interrupted, and it is not practical to wait until it is complete. + + The existing firmware code can be modified (driver update) or + extended by adding new code to the firmware (code injection). + + To compile this driver as module, choose M here: + the module will be called pfr_update. + if ARM64 source "drivers/acpi/arm64/Kconfig" diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 3018714e87d9..2ad2e821cc08 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_acpi.o obj-$(CONFIG_ACPI_SPCR_TABLE) += spcr.o obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o obj-$(CONFIG_ACPI_PPTT) += pptt.o +obj-$(CONFIG_ACPI_PFRUT) += pfr_update.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o diff --git a/drivers/acpi/pfr_update.c b/drivers/acpi/pfr_update.c new file mode 100644 index 000000000000..149b5b2530b9 --- /dev/null +++ b/drivers/acpi/pfr_update.c @@ -0,0 +1,575 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACPI Platform Firmware Runtime Update Device driver + * + * Copyright (C) 2021 Intel Corporation + * Author: Chen Yu + * + * pfr_update driver is used for Platform Firmware Runtime + * Update, which includes the code injection and driver update. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define PFRU_FUNC_STANDARD_QUERY 0 +#define PFRU_FUNC_QUERY_UPDATE_CAP 1 +#define PFRU_FUNC_QUERY_BUF 2 +#define PFRU_FUNC_START 3 + +#define PFRU_CODE_INJECT_TYPE 1 +#define PFRU_DRIVER_UPDATE_TYPE 2 + +#define PFRU_REVID_1 1 +#define PFRU_REVID_2 2 +#define PFRU_DEFAULT_REV_ID PFRU_REVID_1 + +enum cap_index { + CAP_STATUS_IDX = 0, + CAP_UPDATE_IDX = 1, + CAP_CODE_TYPE_IDX = 2, + CAP_FW_VER_IDX = 3, + CAP_CODE_RT_VER_IDX = 4, + CAP_DRV_TYPE_IDX = 5, + CAP_DRV_RT_VER_IDX = 6, + CAP_DRV_SVN_IDX = 7, + CAP_PLAT_ID_IDX = 8, + CAP_OEM_ID_IDX = 9, + CAP_OEM_INFO_IDX = 10, + CAP_NR_IDX +}; + +enum buf_index { + BUF_STATUS_IDX = 0, + BUF_EXT_STATUS_IDX = 1, + BUF_ADDR_LOW_IDX = 2, + BUF_ADDR_HI_IDX = 3, + BUF_SIZE_IDX = 4, + BUF_NR_IDX +}; + +enum update_index { + UPDATE_STATUS_IDX = 0, + UPDATE_EXT_STATUS_IDX = 1, + UPDATE_AUTH_TIME_LOW_IDX = 2, + UPDATE_AUTH_TIME_HI_IDX = 3, + UPDATE_EXEC_TIME_LOW_IDX = 4, + UPDATE_EXEC_TIME_HI_IDX = 5, + UPDATE_NR_IDX +}; + +enum pfru_start_action { + START_STAGE = 0, + START_ACTIVATE = 1, + START_STAGE_ACTIVATE = 2, +}; + +struct pfru_device { + u32 rev_id, index; + struct device *parent_dev; + struct miscdevice miscdev; +}; + +static DEFINE_IDA(pfru_ida); + +/* + * Manual reference: + * https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf + * + * pfru_guid is the parameter for _DSM method + */ +static const guid_t pfru_guid = + GUID_INIT(0xECF9533B, 0x4A3C, 0x4E89, 0x93, 0x9E, 0xC7, 0x71, + 0x12, 0x60, 0x1C, 0x6D); + +/* pfru_code_inj_guid is the UUID to identify code injection EFI capsule file */ +static const guid_t pfru_code_inj_guid = + GUID_INIT(0xB2F84B79, 0x7B6E, 0x4E45, 0x88, 0x5F, 0x3F, 0xB9, + 0xBB, 0x18, 0x54, 0x02); + +/* pfru_drv_update_guid is the UUID to identify driver update EFI capsule file */ +static const guid_t pfru_drv_update_guid = + GUID_INIT(0x4569DD8C, 0x75F1, 0x429A, 0xA3, 0xD6, 0x24, 0xDE, + 0x80, 0x97, 0xA0, 0xDF); + +static inline int pfru_valid_revid(u32 id) +{ + return id == PFRU_REVID_1 || id == PFRU_REVID_2; +} + +static inline struct pfru_device *to_pfru_dev(struct file *file) +{ + return container_of(file->private_data, struct pfru_device, miscdev); +} + +static int query_capability(struct pfru_update_cap_info *cap_hdr, + struct pfru_device *pfru_dev) +{ + acpi_handle handle = ACPI_HANDLE(pfru_dev->parent_dev); + union acpi_object *out_obj; + int ret = -EINVAL; + + out_obj = acpi_evaluate_dsm_typed(handle, &pfru_guid, + pfru_dev->rev_id, + PFRU_FUNC_QUERY_UPDATE_CAP, + NULL, ACPI_TYPE_PACKAGE); + if (!out_obj) + return ret; + + if (out_obj->package.count < CAP_NR_IDX || + out_obj->package.elements[CAP_STATUS_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[CAP_UPDATE_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[CAP_CODE_TYPE_IDX].type != ACPI_TYPE_BUFFER || + out_obj->package.elements[CAP_FW_VER_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[CAP_CODE_RT_VER_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[CAP_DRV_TYPE_IDX].type != ACPI_TYPE_BUFFER || + out_obj->package.elements[CAP_DRV_RT_VER_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[CAP_DRV_SVN_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[CAP_PLAT_ID_IDX].type != ACPI_TYPE_BUFFER || + out_obj->package.elements[CAP_OEM_ID_IDX].type != ACPI_TYPE_BUFFER || + out_obj->package.elements[CAP_OEM_INFO_IDX].type != ACPI_TYPE_BUFFER) + goto free_acpi_buffer; + + cap_hdr->status = out_obj->package.elements[CAP_STATUS_IDX].integer.value; + if (cap_hdr->status != DSM_SUCCEED) { + ret = -EBUSY; + dev_dbg(pfru_dev->parent_dev, "Error Status:%d\n", cap_hdr->status); + goto free_acpi_buffer; + } + + cap_hdr->update_cap = out_obj->package.elements[CAP_UPDATE_IDX].integer.value; + memcpy(&cap_hdr->code_type, + out_obj->package.elements[CAP_CODE_TYPE_IDX].buffer.pointer, + out_obj->package.elements[CAP_CODE_TYPE_IDX].buffer.length); + cap_hdr->fw_version = + out_obj->package.elements[CAP_FW_VER_IDX].integer.value; + cap_hdr->code_rt_version = + out_obj->package.elements[CAP_CODE_RT_VER_IDX].integer.value; + memcpy(&cap_hdr->drv_type, + out_obj->package.elements[CAP_DRV_TYPE_IDX].buffer.pointer, + out_obj->package.elements[CAP_DRV_TYPE_IDX].buffer.length); + cap_hdr->drv_rt_version = + out_obj->package.elements[CAP_DRV_RT_VER_IDX].integer.value; + cap_hdr->drv_svn = + out_obj->package.elements[CAP_DRV_SVN_IDX].integer.value; + memcpy(&cap_hdr->platform_id, + out_obj->package.elements[CAP_PLAT_ID_IDX].buffer.pointer, + out_obj->package.elements[CAP_PLAT_ID_IDX].buffer.length); + memcpy(&cap_hdr->oem_id, + out_obj->package.elements[CAP_OEM_ID_IDX].buffer.pointer, + out_obj->package.elements[CAP_OEM_ID_IDX].buffer.length); + cap_hdr->oem_info_len = + out_obj->package.elements[CAP_OEM_INFO_IDX].buffer.length; + + ret = 0; + +free_acpi_buffer: + kfree(out_obj); + + return ret; +} + +static int query_buffer(struct pfru_com_buf_info *info, + struct pfru_device *pfru_dev) +{ + acpi_handle handle = ACPI_HANDLE(pfru_dev->parent_dev); + union acpi_object *out_obj; + int ret = -EINVAL; + + out_obj = acpi_evaluate_dsm_typed(handle, &pfru_guid, + pfru_dev->rev_id, PFRU_FUNC_QUERY_BUF, + NULL, ACPI_TYPE_PACKAGE); + if (!out_obj) + return ret; + + if (out_obj->package.count < BUF_NR_IDX || + out_obj->package.elements[BUF_STATUS_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[BUF_EXT_STATUS_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[BUF_ADDR_LOW_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[BUF_ADDR_HI_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[BUF_SIZE_IDX].type != ACPI_TYPE_INTEGER) + goto free_acpi_buffer; + + info->status = out_obj->package.elements[BUF_STATUS_IDX].integer.value; + info->ext_status = + out_obj->package.elements[BUF_EXT_STATUS_IDX].integer.value; + if (info->status != DSM_SUCCEED) { + ret = -EBUSY; + dev_dbg(pfru_dev->parent_dev, "Error Status:%d\n", info->status); + dev_dbg(pfru_dev->parent_dev, "Error Extended Status:%d\n", info->ext_status); + + goto free_acpi_buffer; + } + + info->addr_lo = + out_obj->package.elements[BUF_ADDR_LOW_IDX].integer.value; + info->addr_hi = + out_obj->package.elements[BUF_ADDR_HI_IDX].integer.value; + info->buf_size = out_obj->package.elements[BUF_SIZE_IDX].integer.value; + + ret = 0; + +free_acpi_buffer: + kfree(out_obj); + + return ret; +} + +static int get_image_type(const struct efi_manage_capsule_image_header *img_hdr, + struct pfru_device *pfru_dev) +{ + const efi_guid_t *image_type_id = &img_hdr->image_type_id; + + /* check whether this is a code injection or driver update */ + if (guid_equal(image_type_id, &pfru_code_inj_guid)) + return PFRU_CODE_INJECT_TYPE; + + if (guid_equal(image_type_id, &pfru_drv_update_guid)) + return PFRU_DRIVER_UPDATE_TYPE; + + return -EINVAL; +} + +static int adjust_efi_size(const struct efi_manage_capsule_image_header *img_hdr, + int size) +{ + /* + * The (u64 hw_ins) was introduced in UEFI spec version 2, + * and (u64 capsule_support) was introduced in version 3. + * The size needs to be adjusted accordingly. That is to + * say, version 1 should subtract the size of hw_ins+capsule_support, + * and version 2 should sbstract the size of capsule_support. + */ + size += sizeof(struct efi_manage_capsule_image_header); + switch (img_hdr->ver) { + case 1: + return size - 2 * sizeof(u64); + + case 2: + return size - sizeof(u64); + + default: + /* only support version 1 and 2 */ + return -EINVAL; + } +} + +static bool applicable_image(const void *data, struct pfru_update_cap_info *cap, + struct pfru_device *pfru_dev) +{ + struct pfru_payload_hdr *payload_hdr; + const efi_capsule_header_t *cap_hdr = data; + const struct efi_manage_capsule_header *m_hdr; + const struct efi_manage_capsule_image_header *m_img_hdr; + const struct efi_image_auth *auth; + int type, size; + + /* + * If the code in the capsule is older than the current + * firmware code, the update will be rejected by the firmware, + * so check the version of it upfront without engaging the + * Management Mode update mechanism which may be costly. + */ + size = cap_hdr->headersize; + m_hdr = data + size; + /* + * Current data structure size plus variable array indicated + * by number of (emb_drv_cnt + payload_cnt) + */ + size += offsetof(struct efi_manage_capsule_header, offset_list) + + (m_hdr->emb_drv_cnt + m_hdr->payload_cnt) * sizeof(u64); + m_img_hdr = data + size; + + type = get_image_type(m_img_hdr, pfru_dev); + if (type < 0) + return false; + + size = adjust_efi_size(m_img_hdr, size); + if (size < 0) + return false; + + auth = data + size; + size += sizeof(u64) + auth->auth_info.hdr.len; + payload_hdr = (struct pfru_payload_hdr *)(data + size); + + /* finally compare the version */ + if (type == PFRU_CODE_INJECT_TYPE) + return payload_hdr->rt_ver >= cap->code_rt_version; + + return payload_hdr->rt_ver >= cap->drv_rt_version; +} + +static void print_update_debug_info(struct pfru_updated_result *result, + struct pfru_device *pfru_dev) +{ + dev_dbg(pfru_dev->parent_dev, "Update result:\n"); + dev_dbg(pfru_dev->parent_dev, "Authentication Time Low:%lld\n", + result->low_auth_time); + dev_dbg(pfru_dev->parent_dev, "Authentication Time High:%lld\n", + result->high_auth_time); + dev_dbg(pfru_dev->parent_dev, "Execution Time Low:%lld\n", + result->low_exec_time); + dev_dbg(pfru_dev->parent_dev, "Execution Time High:%lld\n", + result->high_exec_time); +} + +static int start_update(int action, struct pfru_device *pfru_dev) +{ + union acpi_object *out_obj, in_obj, in_buf; + struct pfru_updated_result update_result; + acpi_handle handle; + int ret = -EINVAL; + + memset(&in_obj, 0, sizeof(in_obj)); + memset(&in_buf, 0, sizeof(in_buf)); + in_obj.type = ACPI_TYPE_PACKAGE; + in_obj.package.count = 1; + in_obj.package.elements = &in_buf; + in_buf.type = ACPI_TYPE_INTEGER; + in_buf.integer.value = action; + + handle = ACPI_HANDLE(pfru_dev->parent_dev); + out_obj = acpi_evaluate_dsm_typed(handle, &pfru_guid, + pfru_dev->rev_id, PFRU_FUNC_START, + &in_obj, ACPI_TYPE_PACKAGE); + if (!out_obj) + return ret; + + if (out_obj->package.count < UPDATE_NR_IDX || + out_obj->package.elements[UPDATE_STATUS_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[UPDATE_EXT_STATUS_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[UPDATE_AUTH_TIME_LOW_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[UPDATE_AUTH_TIME_HI_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[UPDATE_EXEC_TIME_LOW_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[UPDATE_EXEC_TIME_HI_IDX].type != ACPI_TYPE_INTEGER) + goto free_acpi_buffer; + + update_result.status = + out_obj->package.elements[UPDATE_STATUS_IDX].integer.value; + update_result.ext_status = + out_obj->package.elements[UPDATE_EXT_STATUS_IDX].integer.value; + + if (update_result.status != DSM_SUCCEED) { + ret = -EBUSY; + dev_dbg(pfru_dev->parent_dev, "Error Status:%d\n", update_result.status); + dev_dbg(pfru_dev->parent_dev, "Error Extended Status:%d\n", + update_result.ext_status); + + goto free_acpi_buffer; + } + + update_result.low_auth_time = + out_obj->package.elements[UPDATE_AUTH_TIME_LOW_IDX].integer.value; + update_result.high_auth_time = + out_obj->package.elements[UPDATE_AUTH_TIME_HI_IDX].integer.value; + update_result.low_exec_time = + out_obj->package.elements[UPDATE_EXEC_TIME_LOW_IDX].integer.value; + update_result.high_exec_time = + out_obj->package.elements[UPDATE_EXEC_TIME_HI_IDX].integer.value; + + print_update_debug_info(&update_result, pfru_dev); + ret = 0; + +free_acpi_buffer: + kfree(out_obj); + + return ret; +} + +static long pfru_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pfru_update_cap_info cap_hdr; + struct pfru_device *pfru_dev = to_pfru_dev(file); + void __user *p = (void __user *)arg; + u32 rev; + int ret; + + switch (cmd) { + case PFRU_IOC_QUERY_CAP: + ret = query_capability(&cap_hdr, pfru_dev); + if (ret) + return ret; + + if (copy_to_user(p, &cap_hdr, sizeof(cap_hdr))) + return -EFAULT; + + return 0; + + case PFRU_IOC_SET_REV: + if (copy_from_user(&rev, p, sizeof(rev))) + return -EFAULT; + + if (!pfru_valid_revid(rev)) + return -EINVAL; + + pfru_dev->rev_id = rev; + + return 0; + + case PFRU_IOC_STAGE: + return start_update(START_STAGE, pfru_dev); + + case PFRU_IOC_ACTIVATE: + return start_update(START_ACTIVATE, pfru_dev); + + case PFRU_IOC_STAGE_ACTIVATE: + return start_update(START_STAGE_ACTIVATE, pfru_dev); + + default: + return -ENOTTY; + } +} + +static ssize_t pfru_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct pfru_device *pfru_dev = to_pfru_dev(file); + struct pfru_update_cap_info cap; + struct pfru_com_buf_info buf_info; + phys_addr_t phy_addr; + struct iov_iter iter; + struct iovec iov; + char *buf_ptr; + int ret; + + ret = query_buffer(&buf_info, pfru_dev); + if (ret) + return ret; + + if (len > buf_info.buf_size) + return -EINVAL; + + iov.iov_base = (void __user *)buf; + iov.iov_len = len; + iov_iter_init(&iter, WRITE, &iov, 1, len); + + /* map the communication buffer */ + phy_addr = (phys_addr_t)((buf_info.addr_hi << 32) | buf_info.addr_lo); + buf_ptr = memremap(phy_addr, buf_info.buf_size, MEMREMAP_WB); + if (IS_ERR(buf_ptr)) + return PTR_ERR(buf_ptr); + + if (!copy_from_iter_full(buf_ptr, len, &iter)) { + ret = -EINVAL; + goto unmap; + } + + /* check if the capsule header has a valid version number */ + ret = query_capability(&cap, pfru_dev); + if (ret) + goto unmap; + + if (!applicable_image(buf_ptr, &cap, pfru_dev)) + ret = -EINVAL; + +unmap: + memunmap(buf_ptr); + + return ret ?: len; +} + +static const struct file_operations acpi_pfru_fops = { + .owner = THIS_MODULE, + .write = pfru_write, + .unlocked_ioctl = pfru_ioctl, + .llseek = noop_llseek, +}; + +static int acpi_pfru_remove(struct platform_device *pdev) +{ + struct pfru_device *pfru_dev = platform_get_drvdata(pdev); + + misc_deregister(&pfru_dev->miscdev); + + return 0; +} + +static void pfru_put_idx(void *data) +{ + struct pfru_device *pfru_dev = data; + + ida_free(&pfru_ida, pfru_dev->index); +} + +static int acpi_pfru_probe(struct platform_device *pdev) +{ + acpi_handle handle = ACPI_HANDLE(&pdev->dev); + struct pfru_device *pfru_dev; + int ret; + + if (!acpi_has_method(handle, "_DSM")) { + dev_dbg(&pdev->dev, "Missing _DSM\n"); + return -ENODEV; + } + + pfru_dev = devm_kzalloc(&pdev->dev, sizeof(*pfru_dev), GFP_KERNEL); + if (!pfru_dev) + return -ENOMEM; + + ret = ida_alloc(&pfru_ida, GFP_KERNEL); + if (ret < 0) + return ret; + + pfru_dev->index = ret; + ret = devm_add_action_or_reset(&pdev->dev, pfru_put_idx, pfru_dev); + if (ret) + return ret; + + pfru_dev->rev_id = PFRU_DEFAULT_REV_ID; + pfru_dev->parent_dev = &pdev->dev; + + pfru_dev->miscdev.minor = MISC_DYNAMIC_MINOR; + pfru_dev->miscdev.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pfru%d", pfru_dev->index); + if (!pfru_dev->miscdev.name) + return -ENOMEM; + + pfru_dev->miscdev.nodename = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "acpi_pfr_update%d", pfru_dev->index); + if (!pfru_dev->miscdev.nodename) + return -ENOMEM; + + pfru_dev->miscdev.fops = &acpi_pfru_fops; + pfru_dev->miscdev.parent = &pdev->dev; + + ret = misc_register(&pfru_dev->miscdev); + if (ret) + return ret; + + platform_set_drvdata(pdev, pfru_dev); + + return 0; +} + +static const struct acpi_device_id acpi_pfru_ids[] = { + {"INTC1080"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, acpi_pfru_ids); + +static struct platform_driver acpi_pfru_driver = { + .driver = { + .name = "pfr_update", + .acpi_match_table = acpi_pfru_ids, + }, + .probe = acpi_pfru_probe, + .remove = acpi_pfru_remove, +}; +module_platform_driver(acpi_pfru_driver); + +MODULE_DESCRIPTION("Platform Firmware Runtime Update device driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h new file mode 100644 index 000000000000..fa97e80a93b7 --- /dev/null +++ b/include/uapi/linux/pfrut.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Platform Firmware Runtime Update header + * + * Copyright(c) 2021 Intel Corporation. All rights reserved. + */ +#ifndef __PFRUT_H__ +#define __PFRUT_H__ + +#include +#include + +#define PFRUT_IOCTL_MAGIC 0xEE + +/** + * PFRU_IOC_SET_REV - _IOW(PFRUT_IOCTL_MAGIC, 0x01, unsigned int) + * + * Return: + * * 0 - success + * * -EFAULT - fail to read the revision id + * * -EINVAL - user provides an invalid revision id + * + * Set the Revision ID for Platform Firmware Runtime Update. + */ +#define PFRU_IOC_SET_REV _IOW(PFRUT_IOCTL_MAGIC, 0x01, unsigned int) + +/** + * PFRU_IOC_STAGE - _IOW(PFRUT_IOCTL_MAGIC, 0x02, unsigned int) + * + * Return: + * * 0 - success + * * -EINVAL - stage phase returns invalid result + * + * Stage a capsule image from communication buffer and perform authentication. + */ +#define PFRU_IOC_STAGE _IOW(PFRUT_IOCTL_MAGIC, 0x02, unsigned int) + +/** + * PFRU_IOC_ACTIVATE - _IOW(PFRUT_IOCTL_MAGIC, 0x03, unsigned int) + * + * Return: + * * 0 - success + * * -EINVAL - activate phase returns invalid result + * + * Activate a previously staged capsule image. + */ +#define PFRU_IOC_ACTIVATE _IOW(PFRUT_IOCTL_MAGIC, 0x03, unsigned int) + +/** + * PFRU_IOC_STAGE_ACTIVATE - _IOW(PFRUT_IOCTL_MAGIC, 0x04, unsigned int) + * + * Return: + * * 0 - success + * * -EINVAL - stage/activate phase returns invalid result. + * + * Perform both stage and activation action. + */ +#define PFRU_IOC_STAGE_ACTIVATE _IOW(PFRUT_IOCTL_MAGIC, 0x04, unsigned int) + +/** + * PFRU_IOC_QUERY_CAP - _IOR(PFRUT_IOCTL_MAGIC, 0x05, + * struct pfru_update_cap_info) + * + * Return: + * * 0 - success + * * -EINVAL - query phase returns invalid result + * * -EFAULT - the result fails to be copied to userspace + * + * Retrieve information on the Platform Firmware Runtime Update capability. + * The information is a struct pfru_update_cap_info. + */ +#define PFRU_IOC_QUERY_CAP _IOR(PFRUT_IOCTL_MAGIC, 0x05, struct pfru_update_cap_info) + +/** + * struct pfru_payload_hdr - Capsule file payload header. + * + * @sig: Signature of this capsule file. + * @hdr_version: Revision of this header structure. + * @hdr_size: Size of this header, including the OemHeader bytes. + * @hw_ver: The supported firmware version. + * @rt_ver: Version of the code injection image. + * @platform_id: A platform specific GUID to specify the platform what + * this capsule image support. + */ +struct pfru_payload_hdr { + __u32 sig; + __u32 hdr_version; + __u32 hdr_size; + __u32 hw_ver; + __u32 rt_ver; + __u8 platform_id[16]; +}; + +enum pfru_dsm_status { + DSM_SUCCEED = 0, + DSM_FUNC_NOT_SUPPORT = 1, + DSM_INVAL_INPUT = 2, + DSM_HARDWARE_ERR = 3, + DSM_RETRY_SUGGESTED = 4, + DSM_UNKNOWN = 5, + DSM_FUNC_SPEC_ERR = 6, +}; + +/** + * struct pfru_update_cap_info - Runtime update capability information. + * + * @status: Indicator of whether this query succeed. + * @update_cap: Bitmap to indicate whether the feature is supported. + * @code_type: A buffer containing an image type GUID. + * @fw_version: Platform firmware version. + * @code_rt_version: Code injection runtime version for anti-rollback. + * @drv_type: A buffer containing an image type GUID. + * @drv_rt_version: The version of the driver update runtime code. + * @drv_svn: The secure version number(SVN) of the driver update runtime code. + * @platform_id: A buffer containing a platform ID GUID. + * @oem_id: A buffer containing an OEM ID GUID. + * @oem_info_len: Length of the buffer containing the vendor specific information. + */ +struct pfru_update_cap_info { + __u32 status; + __u32 update_cap; + + __u8 code_type[16]; + __u32 fw_version; + __u32 code_rt_version; + + __u8 drv_type[16]; + __u32 drv_rt_version; + __u32 drv_svn; + + __u8 platform_id[16]; + __u8 oem_id[16]; + + __u32 oem_info_len; +}; + +/** + * struct pfru_com_buf_info - Communication buffer information. + * + * @status: Indicator of whether this query succeed. + * @ext_status: Implementation specific query result. + * @addr_lo: Low 32bit physical address of the communication buffer to hold + * a runtime update package. + * @addr_hi: High 32bit physical address of the communication buffer to hold + * a runtime update package. + * @buf_size: Maximum size in bytes of the communication buffer. + */ +struct pfru_com_buf_info { + __u32 status; + __u32 ext_status; + __u64 addr_lo; + __u64 addr_hi; + __u32 buf_size; +}; + +/** + * struct pfru_updated_result - Platform firmware runtime update result information. + * @status: Indicator of whether this update succeed. + * @ext_status: Implementation specific update result. + * @low_auth_time: Low 32bit value of image authentication time in nanosecond. + * @high_auth_time: High 32bit value of image authentication time in nanosecond. + * @low_exec_time: Low 32bit value of image execution time in nanosecond. + * @high_exec_time: High 32bit value of image execution time in nanosecond. + */ +struct pfru_updated_result { + __u32 status; + __u32 ext_status; + __u64 low_auth_time; + __u64 high_auth_time; + __u64 low_exec_time; + __u64 high_exec_time; +}; + +#endif /* __PFRUT_H__ */ From b0013e037a8b07772c74ce24f1ae4743b30fc3cf Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 22 Dec 2021 12:32:02 +0800 Subject: [PATCH 122/493] ACPI: Introduce Platform Firmware Runtime Telemetry driver This driver allows user space to fetch telemetry data from the firmware with the help of the Platform Firmware Runtime Telemetry interface. Both PFRU and PFRT are based on ACPI _DSM interfaces located under special device objects in the ACPI Namespace, but these interfaces are different from each other, so it is better to provide a separate driver from each of them, even though they share some common definitions and naming conventions. Tested-by: Hongyu Ning Signed-off-by: Chen Yu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 8 +- drivers/acpi/Makefile | 2 +- drivers/acpi/pfr_telemetry.c | 434 +++++++++++++++++++++++++++++++++++ include/uapi/linux/pfrut.h | 88 +++++++ 4 files changed, 529 insertions(+), 3 deletions(-) create mode 100644 drivers/acpi/pfr_telemetry.c diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index d0b3ca9d4a97..91f1da16934d 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -532,8 +532,12 @@ config ACPI_PFRUT The existing firmware code can be modified (driver update) or extended by adding new code to the firmware (code injection). - To compile this driver as module, choose M here: - the module will be called pfr_update. + Besides, the telemetry driver allows user space to fetch telemetry + data from the firmware with the help of the Platform Firmware Runtime + Telemetry interface. + + To compile the drivers as modules, choose M here: + the modules will be called pfr_update and pfr_telemetry. if ARM64 source "drivers/acpi/arm64/Kconfig" diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 2ad2e821cc08..d3dc79298ce3 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -102,7 +102,7 @@ obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_acpi.o obj-$(CONFIG_ACPI_SPCR_TABLE) += spcr.o obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o obj-$(CONFIG_ACPI_PPTT) += pptt.o -obj-$(CONFIG_ACPI_PFRUT) += pfr_update.o +obj-$(CONFIG_ACPI_PFRUT) += pfr_update.o pfr_telemetry.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o diff --git a/drivers/acpi/pfr_telemetry.c b/drivers/acpi/pfr_telemetry.c new file mode 100644 index 000000000000..da50dd80192c --- /dev/null +++ b/drivers/acpi/pfr_telemetry.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACPI Platform Firmware Runtime Telemetry driver + * + * Copyright (C) 2021 Intel Corporation + * Author: Chen Yu + * + * This driver allows user space to fetch telemetry data from the + * firmware with the help of the Platform Firmware Runtime Telemetry + * interface. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define PFRT_LOG_EXEC_IDX 0 +#define PFRT_LOG_HISTORY_IDX 1 + +#define PFRT_LOG_ERR 0 +#define PFRT_LOG_WARN 1 +#define PFRT_LOG_INFO 2 +#define PFRT_LOG_VERB 4 + +#define PFRT_FUNC_SET_LEV 1 +#define PFRT_FUNC_GET_LEV 2 +#define PFRT_FUNC_GET_DATA 3 + +#define PFRT_REVID_1 1 +#define PFRT_REVID_2 2 +#define PFRT_DEFAULT_REV_ID PFRT_REVID_1 + +enum log_index { + LOG_STATUS_IDX = 0, + LOG_EXT_STATUS_IDX = 1, + LOG_MAX_SZ_IDX = 2, + LOG_CHUNK1_LO_IDX = 3, + LOG_CHUNK1_HI_IDX = 4, + LOG_CHUNK1_SZ_IDX = 5, + LOG_CHUNK2_LO_IDX = 6, + LOG_CHUNK2_HI_IDX = 7, + LOG_CHUNK2_SZ_IDX = 8, + LOG_ROLLOVER_CNT_IDX = 9, + LOG_RESET_CNT_IDX = 10, + LOG_NR_IDX +}; + +struct pfrt_log_device { + int index; + struct pfrt_log_info info; + struct device *parent_dev; + struct miscdevice miscdev; +}; + +/* pfrt_guid is the parameter for _DSM method */ +static const guid_t pfrt_log_guid = + GUID_INIT(0x75191659, 0x8178, 0x4D9D, 0xB8, 0x8F, 0xAC, 0x5E, + 0x5E, 0x93, 0xE8, 0xBF); + +static DEFINE_IDA(pfrt_log_ida); + +static inline struct pfrt_log_device *to_pfrt_log_dev(struct file *file) +{ + return container_of(file->private_data, struct pfrt_log_device, miscdev); +} + +static int get_pfrt_log_data_info(struct pfrt_log_data_info *data_info, + struct pfrt_log_device *pfrt_log_dev) +{ + acpi_handle handle = ACPI_HANDLE(pfrt_log_dev->parent_dev); + union acpi_object *out_obj, in_obj, in_buf; + int ret = -EBUSY; + + memset(&in_obj, 0, sizeof(in_obj)); + memset(&in_buf, 0, sizeof(in_buf)); + in_obj.type = ACPI_TYPE_PACKAGE; + in_obj.package.count = 1; + in_obj.package.elements = &in_buf; + in_buf.type = ACPI_TYPE_INTEGER; + in_buf.integer.value = pfrt_log_dev->info.log_type; + + out_obj = acpi_evaluate_dsm_typed(handle, &pfrt_log_guid, + pfrt_log_dev->info.log_revid, PFRT_FUNC_GET_DATA, + &in_obj, ACPI_TYPE_PACKAGE); + if (!out_obj) + return -EINVAL; + + if (out_obj->package.count < LOG_NR_IDX || + out_obj->package.elements[LOG_STATUS_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_EXT_STATUS_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_MAX_SZ_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_CHUNK1_LO_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_CHUNK1_HI_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_CHUNK1_SZ_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_CHUNK2_LO_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_CHUNK2_HI_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_CHUNK2_SZ_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_ROLLOVER_CNT_IDX].type != ACPI_TYPE_INTEGER || + out_obj->package.elements[LOG_RESET_CNT_IDX].type != ACPI_TYPE_INTEGER) + goto free_acpi_buffer; + + data_info->status = out_obj->package.elements[LOG_STATUS_IDX].integer.value; + data_info->ext_status = + out_obj->package.elements[LOG_EXT_STATUS_IDX].integer.value; + if (data_info->status != DSM_SUCCEED) { + dev_dbg(pfrt_log_dev->parent_dev, "Error Status:%d\n", data_info->status); + dev_dbg(pfrt_log_dev->parent_dev, "Error Extend Status:%d\n", + data_info->ext_status); + goto free_acpi_buffer; + } + + data_info->max_data_size = + out_obj->package.elements[LOG_MAX_SZ_IDX].integer.value; + data_info->chunk1_addr_lo = + out_obj->package.elements[LOG_CHUNK1_LO_IDX].integer.value; + data_info->chunk1_addr_hi = + out_obj->package.elements[LOG_CHUNK1_HI_IDX].integer.value; + data_info->chunk1_size = + out_obj->package.elements[LOG_CHUNK1_SZ_IDX].integer.value; + data_info->chunk2_addr_lo = + out_obj->package.elements[LOG_CHUNK2_LO_IDX].integer.value; + data_info->chunk2_addr_hi = + out_obj->package.elements[LOG_CHUNK2_HI_IDX].integer.value; + data_info->chunk2_size = + out_obj->package.elements[LOG_CHUNK2_SZ_IDX].integer.value; + data_info->rollover_cnt = + out_obj->package.elements[LOG_ROLLOVER_CNT_IDX].integer.value; + data_info->reset_cnt = + out_obj->package.elements[LOG_RESET_CNT_IDX].integer.value; + + ret = 0; + +free_acpi_buffer: + kfree(out_obj); + + return ret; +} + +static int set_pfrt_log_level(int level, struct pfrt_log_device *pfrt_log_dev) +{ + acpi_handle handle = ACPI_HANDLE(pfrt_log_dev->parent_dev); + union acpi_object *out_obj, *obj, in_obj, in_buf; + enum pfru_dsm_status status, ext_status; + int ret = 0; + + memset(&in_obj, 0, sizeof(in_obj)); + memset(&in_buf, 0, sizeof(in_buf)); + in_obj.type = ACPI_TYPE_PACKAGE; + in_obj.package.count = 1; + in_obj.package.elements = &in_buf; + in_buf.type = ACPI_TYPE_INTEGER; + in_buf.integer.value = level; + + out_obj = acpi_evaluate_dsm_typed(handle, &pfrt_log_guid, + pfrt_log_dev->info.log_revid, PFRT_FUNC_SET_LEV, + &in_obj, ACPI_TYPE_PACKAGE); + if (!out_obj) + return -EINVAL; + + obj = &out_obj->package.elements[0]; + status = obj->integer.value; + if (status != DSM_SUCCEED) { + obj = &out_obj->package.elements[1]; + ext_status = obj->integer.value; + dev_dbg(pfrt_log_dev->parent_dev, "Error Status:%d\n", status); + dev_dbg(pfrt_log_dev->parent_dev, "Error Extend Status:%d\n", ext_status); + ret = -EBUSY; + } + + kfree(out_obj); + + return ret; +} + +static int get_pfrt_log_level(struct pfrt_log_device *pfrt_log_dev) +{ + acpi_handle handle = ACPI_HANDLE(pfrt_log_dev->parent_dev); + union acpi_object *out_obj, *obj; + enum pfru_dsm_status status, ext_status; + int ret = -EBUSY; + + out_obj = acpi_evaluate_dsm_typed(handle, &pfrt_log_guid, + pfrt_log_dev->info.log_revid, PFRT_FUNC_GET_LEV, + NULL, ACPI_TYPE_PACKAGE); + if (!out_obj) + return -EINVAL; + + obj = &out_obj->package.elements[0]; + if (obj->type != ACPI_TYPE_INTEGER) + goto free_acpi_buffer; + + status = obj->integer.value; + if (status != DSM_SUCCEED) { + obj = &out_obj->package.elements[1]; + ext_status = obj->integer.value; + dev_dbg(pfrt_log_dev->parent_dev, "Error Status:%d\n", status); + dev_dbg(pfrt_log_dev->parent_dev, "Error Extend Status:%d\n", ext_status); + goto free_acpi_buffer; + } + + obj = &out_obj->package.elements[2]; + if (obj->type != ACPI_TYPE_INTEGER) + goto free_acpi_buffer; + + ret = obj->integer.value; + +free_acpi_buffer: + kfree(out_obj); + + return ret; +} + +static int valid_log_level(u32 level) +{ + return level == PFRT_LOG_ERR || level == PFRT_LOG_WARN || + level == PFRT_LOG_INFO || level == PFRT_LOG_VERB; +} + +static int valid_log_type(u32 type) +{ + return type == PFRT_LOG_EXEC_IDX || type == PFRT_LOG_HISTORY_IDX; +} + +static inline int valid_log_revid(u32 id) +{ + return id == PFRT_REVID_1 || id == PFRT_REVID_2; +} + +static long pfrt_log_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pfrt_log_device *pfrt_log_dev = to_pfrt_log_dev(file); + struct pfrt_log_data_info data_info; + struct pfrt_log_info info; + void __user *p; + int ret = 0; + + p = (void __user *)arg; + + switch (cmd) { + case PFRT_LOG_IOC_SET_INFO: + if (copy_from_user(&info, p, sizeof(info))) + return -EFAULT; + + if (valid_log_revid(info.log_revid)) + pfrt_log_dev->info.log_revid = info.log_revid; + + if (valid_log_level(info.log_level)) { + ret = set_pfrt_log_level(info.log_level, pfrt_log_dev); + if (ret < 0) + return ret; + + pfrt_log_dev->info.log_level = info.log_level; + } + + if (valid_log_type(info.log_type)) + pfrt_log_dev->info.log_type = info.log_type; + + return 0; + + case PFRT_LOG_IOC_GET_INFO: + info.log_level = get_pfrt_log_level(pfrt_log_dev); + if (ret < 0) + return ret; + + info.log_type = pfrt_log_dev->info.log_type; + info.log_revid = pfrt_log_dev->info.log_revid; + if (copy_to_user(p, &info, sizeof(info))) + return -EFAULT; + + return 0; + + case PFRT_LOG_IOC_GET_DATA_INFO: + ret = get_pfrt_log_data_info(&data_info, pfrt_log_dev); + if (ret) + return ret; + + if (copy_to_user(p, &data_info, sizeof(struct pfrt_log_data_info))) + return -EFAULT; + + return 0; + + default: + return -ENOTTY; + } +} + +static int +pfrt_log_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct pfrt_log_device *pfrt_log_dev; + struct pfrt_log_data_info info; + unsigned long psize, vsize; + phys_addr_t base_addr; + int ret; + + if (vma->vm_flags & VM_WRITE) + return -EROFS; + + /* changing from read to write with mprotect is not allowed */ + vma->vm_flags &= ~VM_MAYWRITE; + + pfrt_log_dev = to_pfrt_log_dev(file); + + ret = get_pfrt_log_data_info(&info, pfrt_log_dev); + if (ret) + return ret; + + base_addr = (phys_addr_t)((info.chunk2_addr_hi << 32) | info.chunk2_addr_lo); + /* pfrt update has not been launched yet */ + if (!base_addr) + return -ENODEV; + + psize = info.max_data_size; + /* base address and total buffer size must be page aligned */ + if (!PAGE_ALIGNED(base_addr) || !PAGE_ALIGNED(psize)) + return -ENODEV; + + vsize = vma->vm_end - vma->vm_start; + if (vsize > psize) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, PFN_DOWN(base_addr), + vsize, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static const struct file_operations acpi_pfrt_log_fops = { + .owner = THIS_MODULE, + .mmap = pfrt_log_mmap, + .unlocked_ioctl = pfrt_log_ioctl, + .llseek = noop_llseek, +}; + +static int acpi_pfrt_log_remove(struct platform_device *pdev) +{ + struct pfrt_log_device *pfrt_log_dev = platform_get_drvdata(pdev); + + misc_deregister(&pfrt_log_dev->miscdev); + + return 0; +} + +static void pfrt_log_put_idx(void *data) +{ + struct pfrt_log_device *pfrt_log_dev = data; + + ida_free(&pfrt_log_ida, pfrt_log_dev->index); +} + +static int acpi_pfrt_log_probe(struct platform_device *pdev) +{ + acpi_handle handle = ACPI_HANDLE(&pdev->dev); + struct pfrt_log_device *pfrt_log_dev; + int ret; + + if (!acpi_has_method(handle, "_DSM")) { + dev_dbg(&pdev->dev, "Missing _DSM\n"); + return -ENODEV; + } + + pfrt_log_dev = devm_kzalloc(&pdev->dev, sizeof(*pfrt_log_dev), GFP_KERNEL); + if (!pfrt_log_dev) + return -ENOMEM; + + ret = ida_alloc(&pfrt_log_ida, GFP_KERNEL); + if (ret < 0) + return ret; + + pfrt_log_dev->index = ret; + ret = devm_add_action_or_reset(&pdev->dev, pfrt_log_put_idx, pfrt_log_dev); + if (ret) + return ret; + + pfrt_log_dev->info.log_revid = PFRT_DEFAULT_REV_ID; + pfrt_log_dev->parent_dev = &pdev->dev; + + pfrt_log_dev->miscdev.minor = MISC_DYNAMIC_MINOR; + pfrt_log_dev->miscdev.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pfrt%d", + pfrt_log_dev->index); + if (!pfrt_log_dev->miscdev.name) + return -ENOMEM; + + pfrt_log_dev->miscdev.nodename = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "acpi_pfr_telemetry%d", + pfrt_log_dev->index); + if (!pfrt_log_dev->miscdev.nodename) + return -ENOMEM; + + pfrt_log_dev->miscdev.fops = &acpi_pfrt_log_fops; + pfrt_log_dev->miscdev.parent = &pdev->dev; + + ret = misc_register(&pfrt_log_dev->miscdev); + if (ret) + return ret; + + platform_set_drvdata(pdev, pfrt_log_dev); + + return 0; +} + +static const struct acpi_device_id acpi_pfrt_log_ids[] = { + {"INTC1081"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, acpi_pfrt_log_ids); + +static struct platform_driver acpi_pfrt_log_driver = { + .driver = { + .name = "pfr_telemetry", + .acpi_match_table = acpi_pfrt_log_ids, + }, + .probe = acpi_pfrt_log_probe, + .remove = acpi_pfrt_log_remove, +}; +module_platform_driver(acpi_pfrt_log_driver); + +MODULE_DESCRIPTION("Platform Firmware Runtime Update Telemetry driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h index fa97e80a93b7..42fa15f8310d 100644 --- a/include/uapi/linux/pfrut.h +++ b/include/uapi/linux/pfrut.h @@ -171,4 +171,92 @@ struct pfru_updated_result { __u64 high_exec_time; }; +/** + * struct pfrt_log_data_info - Log Data from telemetry service. + * @status: Indicator of whether this update succeed. + * @ext_status: Implementation specific update result. + * @chunk1_addr_lo: Low 32bit physical address of the telemetry data chunk1 + * starting address. + * @chunk1_addr_hi: High 32bit physical address of the telemetry data chunk1 + * starting address. + * @chunk2_addr_lo: Low 32bit physical address of the telemetry data chunk2 + * starting address. + * @chunk2_addr_hi: High 32bit physical address of the telemetry data chunk2 + * starting address. + * @max_data_size: Maximum supported size of data of all data chunks combined. + * @chunk1_size: Data size in bytes of the telemetry data chunk1 buffer. + * @chunk2_size: Data size in bytes of the telemetry data chunk2 buffer. + * @rollover_cnt: Number of times telemetry data buffer is overwritten + * since telemetry buffer reset. + * @reset_cnt: Number of times telemetry services resets that results in + * rollover count and data chunk buffers are reset. + */ +struct pfrt_log_data_info { + __u32 status; + __u32 ext_status; + __u64 chunk1_addr_lo; + __u64 chunk1_addr_hi; + __u64 chunk2_addr_lo; + __u64 chunk2_addr_hi; + __u32 max_data_size; + __u32 chunk1_size; + __u32 chunk2_size; + __u32 rollover_cnt; + __u32 reset_cnt; +}; + +/** + * struct pfrt_log_info - Telemetry log information. + * @log_level: The telemetry log level. + * @log_type: The telemetry log type(history and execution). + * @log_revid: The telemetry log revision id. + */ +struct pfrt_log_info { + __u32 log_level; + __u32 log_type; + __u32 log_revid; +}; + +/** + * PFRT_LOG_IOC_SET_INFO - _IOW(PFRUT_IOCTL_MAGIC, 0x06, + * struct pfrt_log_info) + * + * Return: + * * 0 - success + * * -EFAULT - fail to get the setting parameter + * * -EINVAL - fail to set the log level + * + * Set the PFRT log level and log type. The input information is + * a struct pfrt_log_info. + */ +#define PFRT_LOG_IOC_SET_INFO _IOW(PFRUT_IOCTL_MAGIC, 0x06, struct pfrt_log_info) + +/** + * PFRT_LOG_IOC_GET_INFO - _IOR(PFRUT_IOCTL_MAGIC, 0x07, + * struct pfrt_log_info) + * + * Return: + * * 0 - success + * * -EINVAL - fail to get the log level + * * -EFAULT - fail to copy the result back to userspace + * + * Retrieve log level and log type of the telemetry. The information is + * a struct pfrt_log_info. + */ +#define PFRT_LOG_IOC_GET_INFO _IOR(PFRUT_IOCTL_MAGIC, 0x07, struct pfrt_log_info) + +/** + * PFRT_LOG_IOC_GET_DATA_INFO - _IOR(PFRUT_IOCTL_MAGIC, 0x08, + * struct pfrt_log_data_info) + * + * Return: + * * 0 - success + * * -EINVAL - fail to get the log buffer information + * * -EFAULT - fail to copy the log buffer information to userspace + * + * Retrieve data information about the telemetry. The information + * is a struct pfrt_log_data_info. + */ +#define PFRT_LOG_IOC_GET_DATA_INFO _IOR(PFRUT_IOCTL_MAGIC, 0x08, struct pfrt_log_data_info) + #endif /* __PFRUT_H__ */ From 53e8558837be58c1d44d50ad87247a8c56c95c13 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 22 Dec 2021 12:32:34 +0800 Subject: [PATCH 123/493] ACPI: tools: Introduce utility for firmware updates/telemetry Introduce a user space tool to make use of the interface exposed by Platform Firmware Runtime Update and Telemetry drivers. It can be used for firmware code injection, driver updates and to retrieve platform firmware telemetry data. Tested-by: Hongyu Ning Signed-off-by: Chen Yu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/.gitignore | 1 + tools/power/acpi/Makefile | 16 +- tools/power/acpi/Makefile.rules | 2 +- tools/power/acpi/man/pfrut.8 | 137 +++++++++ tools/power/acpi/tools/pfrut/Makefile | 23 ++ tools/power/acpi/tools/pfrut/pfrut.c | 424 ++++++++++++++++++++++++++ 6 files changed, 594 insertions(+), 9 deletions(-) create mode 100644 tools/power/acpi/man/pfrut.8 create mode 100644 tools/power/acpi/tools/pfrut/Makefile create mode 100644 tools/power/acpi/tools/pfrut/pfrut.c diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore index 0b319fc8bb17..eada0297ef88 100644 --- a/tools/power/acpi/.gitignore +++ b/tools/power/acpi/.gitignore @@ -2,4 +2,5 @@ /acpidbg /acpidump /ec +/pfrut /include/ diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile index a249c50ebf55..5ff1d9c864d0 100644 --- a/tools/power/acpi/Makefile +++ b/tools/power/acpi/Makefile @@ -9,18 +9,18 @@ include ../../scripts/Makefile.include .NOTPARALLEL: -all: acpidbg acpidump ec -clean: acpidbg_clean acpidump_clean ec_clean -install: acpidbg_install acpidump_install ec_install -uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall +all: acpidbg acpidump ec pfrut +clean: acpidbg_clean acpidump_clean ec_clean pfrut_clean +install: acpidbg_install acpidump_install ec_install pfrut_install +uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall -acpidbg acpidump ec: FORCE +acpidbg acpidump ec pfrut: FORCE $(call descend,tools/$@,all) -acpidbg_clean acpidump_clean ec_clean: +acpidbg_clean acpidump_clean ec_clean pfrut_clean: $(call descend,tools/$(@:_clean=),clean) -acpidbg_install acpidump_install ec_install: +acpidbg_install acpidump_install ec_install pfrut_install: $(call descend,tools/$(@:_install=),install) -acpidbg_uninstall acpidump_uninstall ec_uninstall: +acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall: $(call descend,tools/$(@:_uninstall=),uninstall) .PHONY: FORCE diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index 1d7616f5d0ae..b71aada77688 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -9,7 +9,7 @@ objdir := $(OUTPUT)tools/$(TOOL)/ toolobjs := $(addprefix $(objdir),$(TOOL_OBJS)) $(OUTPUT)$(TOOL): $(toolobjs) FORCE $(ECHO) " LD " $(subst $(OUTPUT),,$@) - $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@ + $(QUIET) $(LD) $(CFLAGS) $(toolobjs) $(LDFLAGS) -L$(OUTPUT) -o $@ $(ECHO) " STRIP " $(subst $(OUTPUT),,$@) $(QUIET) $(STRIPCMD) $@ diff --git a/tools/power/acpi/man/pfrut.8 b/tools/power/acpi/man/pfrut.8 new file mode 100644 index 000000000000..3db574770e8d --- /dev/null +++ b/tools/power/acpi/man/pfrut.8 @@ -0,0 +1,137 @@ +.TH "PFRUT" "8" "October 2021" "pfrut 1.0" "" +.hy +.SH Name +.PP +pfrut \- Platform Firmware Runtime Update and Telemetry tool +.SH SYNOPSIS +.PP +\f[B]pfrut\f[R] [\f[I]Options\f[R]] +.SH DESCRIPTION +.PP +The PFRUT(Platform Firmware Runtime Update and Telemetry) kernel interface is designed +to +.PD 0 +.P +.PD +interact with the platform firmware interface defined in the +.PD 0 +.P +.PD +Management Mode Firmware Runtime +Update (https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf) +.PD 0 +.P +.PD +\f[B]pfrut\f[R] is the tool to interact with the kernel interface. +.PD 0 +.P +.PD +.SH OPTIONS +.TP +.B \f[B]\-h\f[R], \f[B]\-\-help\f[R] +Display helper information. +.TP +.B \f[B]\-l\f[R], \f[B]\-\-load\f[R] +Load the capsule file into the system. +To be more specific, the capsule file will be copied to the +communication buffer. +.TP +.B \f[B]\-s\f[R], \f[B]\-\-stage\f[R] +Stage the capsule image from communication buffer into Management Mode +and perform authentication. +.TP +.B \f[B]\-a\f[R], \f[B]\-\-activate\f[R] +Activate a previous staged capsule image. +.TP +.B \f[B]\-u\f[R], \f[B]\-\-update\f[R] +Perform both stage and activation actions. +.TP +.B \f[B]\-q\f[R], \f[B]\-\-query\f[R] +Query the update capability. +.TP +.B \f[B]\-d\f[R], \f[B]\-\-setrev\f[R] +Set the revision ID of code injection/driver update. +.TP +.B \f[B]\-D\f[R], \f[B]\-\-setrevlog\f[R] +Set the revision ID of telemetry. +.TP +.B \f[B]\-G\f[R], \f[B]\-\-getloginfo\f[R] +Get telemetry log information and print it out. +.TP +.B \f[B]\-T\f[R], \f[B]\-\-type\f[R] +Set the telemetry log data type. +.TP +.B \f[B]\-L\f[R], \f[B]\-\-level\f[R] +Set the telemetry log level. +.TP +.B \f[B]\-R\f[R], \f[B]\-\-read\f[R] +Read all the telemetry data and print it out. +.SH EXAMPLES +.PP +\f[B]pfrut \-G\f[R] +.PP +log_level:4 +.PD 0 +.P +.PD +log_type:0 +.PD 0 +.P +.PD +log_revid:2 +.PD 0 +.P +.PD +max_data_size:65536 +.PD 0 +.P +.PD +chunk1_size:0 +.PD 0 +.P +.PD +chunk2_size:1401 +.PD 0 +.P +.PD +rollover_cnt:0 +.PD 0 +.P +.PD +reset_cnt:4 +.PP +\f[B]pfru \-q\f[R] +.PP +code injection image type:794bf8b2\-6e7b\-454e\-885f\-3fb9bb185402 +.PD 0 +.P +.PD +fw_version:0 +.PD 0 +.P +.PD +code_rt_version:1 +.PD 0 +.P +.PD +driver update image type:0e5f0b14\-f849\-7945\-ad81\-bc7b6d2bb245 +.PD 0 +.P +.PD +drv_rt_version:0 +.PD 0 +.P +.PD +drv_svn:0 +.PD 0 +.P +.PD +platform id:39214663\-b1a8\-4eaa\-9024\-f2bb53ea4723 +.PD 0 +.P +.PD +oem id:a36db54f\-ea2a\-e14e\-b7c4\-b5780e51ba3d +.PP +\f[B]pfrut \-l yours.cap \-u \-T 1 \-L 4\f[R] +.SH AUTHORS +Chen Yu. diff --git a/tools/power/acpi/tools/pfrut/Makefile b/tools/power/acpi/tools/pfrut/Makefile new file mode 100644 index 000000000000..61c1a96fd433 --- /dev/null +++ b/tools/power/acpi/tools/pfrut/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0+ + +include ../../Makefile.config + +TOOL = pfrut +EXTRA_INSTALL = install-man +EXTRA_UNINSTALL = uninstall-man + +CFLAGS += -Wall -O2 +CFLAGS += -DPFRUT_HEADER='"../../../../../include/uapi/linux/pfrut.h"' +LDFLAGS += -luuid + +TOOL_OBJS = \ + pfrut.o + +include ../../Makefile.rules + +install-man: $(srctree)/man/pfrut.8 + $(ECHO) " INST " pfrut.8 + $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/pfrut.8 +uninstall-man: + $(ECHO) " UNINST " pfrut.8 + $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/pfrut.8 diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c new file mode 100644 index 000000000000..d79c335594b2 --- /dev/null +++ b/tools/power/acpi/tools/pfrut/pfrut.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Platform Firmware Runtime Update tool to do Management + * Mode code injection/driver update and telemetry retrieval. + * + * This tool uses the interfaces provided by pfr_update and + * pfr_telemetry drivers. These interfaces are exposed via + * /dev/pfr_update and /dev/pfr_telemetry. Write operation + * on the /dev/pfr_update is to load the EFI capsule into + * kernel space. Mmap/read operations on /dev/pfr_telemetry + * could be used to read the telemetry data to user space. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include PFRUT_HEADER + +char *capsule_name; +int action, query_cap, log_type, log_level, log_read, log_getinfo, + revid, log_revid; +int set_log_level, set_log_type, + set_revid, set_log_revid; + +char *progname; + +#define LOG_ERR 0 +#define LOG_WARN 1 +#define LOG_INFO 2 +#define LOG_VERB 4 +#define LOG_EXEC_IDX 0 +#define LOG_HISTORY_IDX 1 +#define REVID_1 1 +#define REVID_2 2 + +static int valid_log_level(int level) +{ + return level == LOG_ERR || level == LOG_WARN || + level == LOG_INFO || level == LOG_VERB; +} + +static int valid_log_type(int type) +{ + return type == LOG_EXEC_IDX || type == LOG_HISTORY_IDX; +} + +static inline int valid_log_revid(int id) +{ + return id == REVID_1 || id == REVID_2; +} + +static void help(void) +{ + fprintf(stderr, + "usage: %s [OPTIONS]\n" + " code injection:\n" + " -l, --load\n" + " -s, --stage\n" + " -a, --activate\n" + " -u, --update [stage and activate]\n" + " -q, --query\n" + " -d, --revid update\n" + " telemetry:\n" + " -G, --getloginfo\n" + " -T, --type(0:execution, 1:history)\n" + " -L, --level(0, 1, 2, 4)\n" + " -R, --read\n" + " -D, --revid log\n", + progname); +} + +char *option_string = "l:sauqd:GT:L:RD:h"; +static struct option long_options[] = { + {"load", required_argument, 0, 'l'}, + {"stage", no_argument, 0, 's'}, + {"activate", no_argument, 0, 'a'}, + {"update", no_argument, 0, 'u'}, + {"query", no_argument, 0, 'q'}, + {"getloginfo", no_argument, 0, 'G'}, + {"type", required_argument, 0, 'T'}, + {"level", required_argument, 0, 'L'}, + {"read", no_argument, 0, 'R'}, + {"setrev", required_argument, 0, 'd'}, + {"setrevlog", required_argument, 0, 'D'}, + {"help", no_argument, 0, 'h'}, + {} +}; + +static void parse_options(int argc, char **argv) +{ + int option_index = 0; + char *pathname; + int opt; + + pathname = strdup(argv[0]); + progname = basename(pathname); + + while ((opt = getopt_long_only(argc, argv, option_string, + long_options, &option_index)) != -1) { + switch (opt) { + case 'l': + capsule_name = optarg; + break; + case 's': + action = 1; + break; + case 'a': + action = 2; + break; + case 'u': + action = 3; + break; + case 'q': + query_cap = 1; + break; + case 'G': + log_getinfo = 1; + break; + case 'T': + log_type = atoi(optarg); + set_log_type = 1; + break; + case 'L': + log_level = atoi(optarg); + set_log_level = 1; + break; + case 'R': + log_read = 1; + break; + case 'd': + revid = atoi(optarg); + set_revid = 1; + break; + case 'D': + log_revid = atoi(optarg); + set_log_revid = 1; + break; + case 'h': + help(); + exit(0); + default: + break; + } + } +} + +void print_cap(struct pfru_update_cap_info *cap) +{ + char *uuid; + + uuid = malloc(37); + if (!uuid) { + perror("Can not allocate uuid buffer\n"); + exit(1); + } + + uuid_unparse(cap->code_type, uuid); + printf("code injection image type:%s\n", uuid); + printf("fw_version:%d\n", cap->fw_version); + printf("code_rt_version:%d\n", cap->code_rt_version); + + uuid_unparse(cap->drv_type, uuid); + printf("driver update image type:%s\n", uuid); + printf("drv_rt_version:%d\n", cap->drv_rt_version); + printf("drv_svn:%d\n", cap->drv_svn); + + uuid_unparse(cap->platform_id, uuid); + printf("platform id:%s\n", uuid); + uuid_unparse(cap->oem_id, uuid); + printf("oem id:%s\n", uuid); + printf("oem information length:%d\n", cap->oem_info_len); + + free(uuid); +} + +int main(int argc, char *argv[]) +{ + int fd_update, fd_update_log, fd_capsule; + struct pfrt_log_data_info data_info; + struct pfrt_log_info info; + struct pfru_update_cap_info cap; + void *addr_map_capsule; + struct stat st; + char *log_buf; + int ret = 0; + + if (getuid() != 0) { + printf("Please run the tool as root - Exiting.\n"); + return 1; + } + + parse_options(argc, argv); + + fd_update = open("/dev/acpi_pfr_update0", O_RDWR); + if (fd_update < 0) { + printf("PFRU device not supported - Quit...\n"); + return 1; + } + + fd_update_log = open("/dev/acpi_pfr_telemetry0", O_RDWR); + if (fd_update_log < 0) { + printf("PFRT device not supported - Quit...\n"); + return 1; + } + + if (query_cap) { + ret = ioctl(fd_update, PFRU_IOC_QUERY_CAP, &cap); + if (ret) + perror("Query Update Capability info failed."); + else + print_cap(&cap); + + close(fd_update); + close(fd_update_log); + + return ret; + } + + if (log_getinfo) { + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info); + if (ret) { + perror("Get telemetry data info failed."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_INFO, &info); + if (ret) { + perror("Get telemetry info failed."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + printf("log_level:%d\n", info.log_level); + printf("log_type:%d\n", info.log_type); + printf("log_revid:%d\n", info.log_revid); + printf("max_data_size:%d\n", data_info.max_data_size); + printf("chunk1_size:%d\n", data_info.chunk1_size); + printf("chunk2_size:%d\n", data_info.chunk2_size); + printf("rollover_cnt:%d\n", data_info.rollover_cnt); + printf("reset_cnt:%d\n", data_info.reset_cnt); + + return 0; + } + + info.log_level = -1; + info.log_type = -1; + info.log_revid = -1; + + if (set_log_level) { + if (!valid_log_level(log_level)) { + printf("Invalid log level %d\n", + log_level); + } else { + info.log_level = log_level; + } + } + + if (set_log_type) { + if (!valid_log_type(log_type)) { + printf("Invalid log type %d\n", + log_type); + } else { + info.log_type = log_type; + } + } + + if (set_log_revid) { + if (!valid_log_revid(log_revid)) { + printf("Invalid log revid %d, unchanged.\n", + log_revid); + } else { + info.log_revid = log_revid; + } + } + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_SET_INFO, &info); + if (ret) { + perror("Log information set failed.(log_level, log_type, log_revid)"); + close(fd_update); + close(fd_update_log); + + return 1; + } + + if (set_revid) { + ret = ioctl(fd_update, PFRU_IOC_SET_REV, &revid); + if (ret) { + perror("pfru update revid set failed"); + close(fd_update); + close(fd_update_log); + + return 1; + } + + printf("pfru update revid set to %d\n", revid); + } + + if (capsule_name) { + fd_capsule = open(capsule_name, O_RDONLY); + if (fd_capsule < 0) { + perror("Can not open capsule file..."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + if (fstat(fd_capsule, &st) < 0) { + perror("Can not fstat capsule file..."); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + addr_map_capsule = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, + fd_capsule, 0); + if (addr_map_capsule == MAP_FAILED) { + perror("Failed to mmap capsule file."); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + ret = write(fd_update, (char *)addr_map_capsule, st.st_size); + printf("Load %d bytes of capsule file into the system\n", + ret); + + if (ret == -1) { + perror("Failed to load capsule file"); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + munmap(addr_map_capsule, st.st_size); + close(fd_capsule); + printf("Load done.\n"); + } + + if (action) { + if (action == 1) { + ret = ioctl(fd_update, PFRU_IOC_STAGE, NULL); + } else if (action == 2) { + ret = ioctl(fd_update, PFRU_IOC_ACTIVATE, NULL); + } else if (action == 3) { + ret = ioctl(fd_update, PFRU_IOC_STAGE_ACTIVATE, NULL); + } else { + close(fd_update); + close(fd_update_log); + + return 1; + } + printf("Update finished, return %d\n", ret); + } + + close(fd_update); + + if (log_read) { + void *p_mmap; + int max_data_sz; + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info); + if (ret) { + perror("Get telemetry data info failed."); + close(fd_update_log); + + return 1; + } + + max_data_sz = data_info.max_data_size; + if (!max_data_sz) { + printf("No telemetry data available.\n"); + close(fd_update_log); + + return 1; + } + + log_buf = malloc(max_data_sz + 1); + if (!log_buf) { + perror("log_buf allocate failed."); + close(fd_update_log); + + return 1; + } + + p_mmap = mmap(NULL, max_data_sz, PROT_READ, MAP_SHARED, fd_update_log, 0); + if (p_mmap == MAP_FAILED) { + perror("mmap error."); + close(fd_update_log); + + return 1; + } + + memcpy(log_buf, p_mmap, max_data_sz); + log_buf[max_data_sz] = '\0'; + printf("%s\n", log_buf); + free(log_buf); + + munmap(p_mmap, max_data_sz); + } + + close(fd_update_log); + + return 0; +} From ab571cbc098cd862397a73451f47b69ad581f35f Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 3 Nov 2021 16:03:53 -0700 Subject: [PATCH 124/493] watchdog: Kconfig: enable MTK watchdog Enable CONFIG_MEDIATEK_WATCHDOG when ARCH_MEDIATEK is enabled. On some platforms (e.g. mt8183-pumpkin), watchdog is enabled by bootloader, so kernel driver needs to be enabled to avoid watchdog firing and causing reboot part way through kernel boot. Signed-off-by: Kevin Hilman Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211103230354.915658-1-khilman@baylibre.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 9d222ba17ec6..659064979a97 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -822,6 +822,7 @@ config MESON_WATCHDOG config MEDIATEK_WATCHDOG tristate "Mediatek SoCs watchdog support" depends on ARCH_MEDIATEK || COMPILE_TEST + default ARCH_MEDIATEK select WATCHDOG_CORE select RESET_CONTROLLER help From cea62f9fee0dae304d03def042f8a36f89dd337a Mon Sep 17 00:00:00 2001 From: AaeonIot Date: Wed, 17 Nov 2021 10:40:52 +0800 Subject: [PATCH 125/493] watchdog: f71808e_wdt: Add F81966 support This adds watchdog support the Fintek F81966 Super I/O chip. Testing was done on the Aaeon SSE-OPTI Signed-off-by: AaeonIot Signed-off-by: Chia-Lin Kao (AceLan) Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211117024052.2427539-1-acelan.kao@canonical.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/f71808e_wdt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index ee90c5f943f9..7f59c680de25 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -49,6 +49,7 @@ #define SIO_F81803_ID 0x1210 /* Chipset ID */ #define SIO_F81865_ID 0x0704 /* Chipset ID */ #define SIO_F81866_ID 0x1010 /* Chipset ID */ +#define SIO_F81966_ID 0x1502 /* F81804 chipset ID, same for f81966 */ #define F71808FG_REG_WDO_CONF 0xf0 #define F71808FG_REG_WDT_CONF 0xf5 @@ -105,7 +106,7 @@ MODULE_PARM_DESC(start_withtimeout, "Start watchdog timer on module load with" " given initial timeout. Zero (default) disables this feature."); enum chips { f71808fg, f71858fg, f71862fg, f71868, f71869, f71882fg, f71889fg, - f81803, f81865, f81866}; + f81803, f81865, f81866, f81966}; static const char * const fintek_wdt_names[] = { "f71808fg", @@ -118,6 +119,7 @@ static const char * const fintek_wdt_names[] = { "f81803", "f81865", "f81866", + "f81966" }; /* Super-I/O Function prototypes */ @@ -347,6 +349,7 @@ static int fintek_wdt_start(struct watchdog_device *wdd) break; case f81866: + case f81966: /* * GPIO1 Control Register when 27h BIT3:2 = 01 & BIT0 = 0. * The PIN 70(GPIO15/WDTRST) is controlled by 2Ch: @@ -373,7 +376,7 @@ static int fintek_wdt_start(struct watchdog_device *wdd) superio_select(wd->sioaddr, SIO_F71808FG_LD_WDT); superio_set_bit(wd->sioaddr, SIO_REG_ENABLE, 0); - if (wd->type == f81865 || wd->type == f81866) + if (wd->type == f81865 || wd->type == f81866 || wd->type == f81966) superio_set_bit(wd->sioaddr, F81865_REG_WDO_CONF, F81865_FLAG_WDOUT_EN); else @@ -580,6 +583,9 @@ static int __init fintek_wdt_find(int sioaddr) case SIO_F81866_ID: type = f81866; break; + case SIO_F81966_ID: + type = f81966; + break; default: pr_info("Unrecognized Fintek device: %04x\n", (unsigned int)devid); From 33950f9a36aca55c2b1e6062d9b29f3e97f91c40 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:36 +0200 Subject: [PATCH 126/493] dt-bindings: watchdog: Require samsung,syscon-phandle for Exynos7 Exynos7 watchdog driver is clearly indicating that its dts node must define syscon phandle property. That was probably forgotten, so add it. Signed-off-by: Sam Protsenko Fixes: 2b9366b66967 ("watchdog: s3c2410_wdt: Add support for Watchdog device on Exynos7") Reviewed-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-2-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml index 76cb9586ee00..93cd77a6e92c 100644 --- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml @@ -39,8 +39,8 @@ properties: samsung,syscon-phandle: $ref: /schemas/types.yaml#/definitions/phandle description: - Phandle to the PMU system controller node (in case of Exynos5250 - and Exynos5420). + Phandle to the PMU system controller node (in case of Exynos5250, + Exynos5420 and Exynos7). required: - compatible @@ -58,6 +58,7 @@ allOf: enum: - samsung,exynos5250-wdt - samsung,exynos5420-wdt + - samsung,exynos7-wdt then: required: - samsung,syscon-phandle From 0b595831c2c8f81c252818d3575c689741a7efeb Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:37 +0200 Subject: [PATCH 127/493] dt-bindings: watchdog: Document Exynos850 watchdog bindings Exynos850 SoC has two CPU clusters: - cluster 0: contains CPUs #0, #1, #2, #3 - cluster 1: contains CPUs #4, #5, #6, #7 Each cluster has its own dedicated watchdog timer. Those WDT instances are controlled using different bits in PMU registers, new "samsung,index" property is added to tell the driver which bits to use for defined watchdog node. Also on Exynos850 the peripheral clock and the source clock are two different clocks. Provide a way to specify two clocks in watchdog device tree node. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-3-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/samsung-wdt.yaml | 45 +++++++++++++++++-- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml index 93cd77a6e92c..b08373336b16 100644 --- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml @@ -22,25 +22,32 @@ properties: - samsung,exynos5250-wdt # for Exynos5250 - samsung,exynos5420-wdt # for Exynos5420 - samsung,exynos7-wdt # for Exynos7 + - samsung,exynos850-wdt # for Exynos850 reg: maxItems: 1 clocks: - maxItems: 1 + minItems: 1 + maxItems: 2 clock-names: - items: - - const: watchdog + minItems: 1 + maxItems: 2 interrupts: maxItems: 1 + samsung,cluster-index: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Index of CPU cluster on which watchdog is running (in case of Exynos850) + samsung,syscon-phandle: $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the PMU system controller node (in case of Exynos5250, - Exynos5420 and Exynos7). + Exynos5420, Exynos7 and Exynos850). required: - compatible @@ -59,9 +66,39 @@ allOf: - samsung,exynos5250-wdt - samsung,exynos5420-wdt - samsung,exynos7-wdt + - samsung,exynos850-wdt then: required: - samsung,syscon-phandle + - if: + properties: + compatible: + contains: + enum: + - samsung,exynos850-wdt + then: + properties: + clocks: + items: + - description: Bus clock, used for register interface + - description: Source clock (driving watchdog counter) + clock-names: + items: + - const: watchdog + - const: watchdog_src + samsung,cluster-index: + enum: [0, 1] + required: + - samsung,cluster-index + else: + properties: + clocks: + items: + - description: Bus clock, which is also a source clock + clock-names: + items: + - const: watchdog + samsung,cluster-index: false unevaluatedProperties: false From f197d47584be621d634ad7fdfad3e62c8c13ce24 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:38 +0200 Subject: [PATCH 128/493] watchdog: s3c2410: Fail probe if can't find valid timeout Driver can't work properly if there no valid timeout was found in s3c2410wdt_set_heartbeat(). Ideally, that function should be reworked in a way that it's always able to find some valid timeout. As a temporary solution let's for now just fail the driver probe in case the valid timeout can't be found in s3c2410wdt_set_heartbeat() function. Signed-off-by: Sam Protsenko Reported-by: Guenter Roeck Suggested-by: Guenter Roeck Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-4-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 2395f353e52d..00421cf22556 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -515,7 +515,6 @@ static int s3c2410wdt_probe(struct platform_device *pdev) struct s3c2410_wdt *wdt; struct resource *wdt_irq; unsigned int wtcon; - int started = 0; int ret; wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); @@ -581,15 +580,15 @@ static int s3c2410wdt_probe(struct platform_device *pdev) ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device, wdt->wdt_device.timeout); if (ret) { - started = s3c2410wdt_set_heartbeat(&wdt->wdt_device, - S3C2410_WATCHDOG_DEFAULT_TIME); - - if (started == 0) - dev_info(dev, - "tmr_margin value out of range, default %d used\n", + ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device, + S3C2410_WATCHDOG_DEFAULT_TIME); + if (ret == 0) { + dev_warn(dev, "tmr_margin value out of range, default %d used\n", S3C2410_WATCHDOG_DEFAULT_TIME); - else - dev_info(dev, "default timer value is out of range, cannot start\n"); + } else { + dev_err(dev, "failed to use default timeout\n"); + goto err_cpufreq; + } } ret = devm_request_irq(dev, wdt_irq->start, s3c2410wdt_irq, 0, @@ -613,10 +612,10 @@ static int s3c2410wdt_probe(struct platform_device *pdev) if (ret < 0) goto err_unregister; - if (tmr_atboot && started == 0) { + if (tmr_atboot) { dev_info(dev, "starting watchdog timer\n"); s3c2410wdt_start(&wdt->wdt_device); - } else if (!tmr_atboot) { + } else { /* if we're not enabling the watchdog, then ensure it is * disabled if it has been left running from the bootloader * or other source */ From a90102e358ee336b96e2447104f47dee7a347aac Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:39 +0200 Subject: [PATCH 129/493] watchdog: s3c2410: Let kernel kick watchdog When "tmr_atboot" module param is set, the watchdog is started in driver's probe. In that case, also set WDOG_HW_RUNNING bit to let watchdog core driver know it's running. This way watchdog core can kick the watchdog for us (if CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED option is enabled), until user space takes control. WDOG_HW_RUNNING bit must be set before registering the watchdog. So the "tmr_atboot" handling code is moved before watchdog registration, to avoid performing the same check twice. This is also logical because WDOG_HW_RUNNING bit makes WDT core expect actually running watchdog. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-5-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 00421cf22556..0845c05034a1 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -604,6 +604,21 @@ static int s3c2410wdt_probe(struct platform_device *pdev) wdt->wdt_device.bootstatus = s3c2410wdt_get_bootstatus(wdt); wdt->wdt_device.parent = dev; + /* + * If "tmr_atboot" param is non-zero, start the watchdog right now. Also + * set WDOG_HW_RUNNING bit, so that watchdog core can kick the watchdog. + * + * If we're not enabling the watchdog, then ensure it is disabled if it + * has been left running from the bootloader or other source. + */ + if (tmr_atboot) { + dev_info(dev, "starting watchdog timer\n"); + s3c2410wdt_start(&wdt->wdt_device); + set_bit(WDOG_HW_RUNNING, &wdt->wdt_device.status); + } else { + s3c2410wdt_stop(&wdt->wdt_device); + } + ret = watchdog_register_device(&wdt->wdt_device); if (ret) goto err_cpufreq; @@ -612,17 +627,6 @@ static int s3c2410wdt_probe(struct platform_device *pdev) if (ret < 0) goto err_unregister; - if (tmr_atboot) { - dev_info(dev, "starting watchdog timer\n"); - s3c2410wdt_start(&wdt->wdt_device); - } else { - /* if we're not enabling the watchdog, then ensure it is - * disabled if it has been left running from the bootloader - * or other source */ - - s3c2410wdt_stop(&wdt->wdt_device); - } - platform_set_drvdata(pdev, wdt); /* print out a statement of readiness */ From 8d9fdf60e37c7752931738c1dc202b4c6066f0d2 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:40 +0200 Subject: [PATCH 130/493] watchdog: s3c2410: Make reset disable register optional On new Exynos chips (e.g. Exynos850 and Exynos9) the AUTOMATIC_WDT_RESET_DISABLE register was removed, and its value can be thought of as "always 0x0". Add correspondig quirk bit, so that the driver can omit accessing it if it's not present. This commit doesn't bring any functional change to existing devices, but merely provides an infrastructure for upcoming chips support. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-6-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 0845c05034a1..2cc4923a98a5 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -59,10 +59,12 @@ #define QUIRK_HAS_PMU_CONFIG (1 << 0) #define QUIRK_HAS_RST_STAT (1 << 1) #define QUIRK_HAS_WTCLRINT_REG (1 << 2) +#define QUIRK_HAS_PMU_AUTO_DISABLE (1 << 3) /* These quirks require that we have a PMU register map */ #define QUIRKS_HAVE_PMUREG (QUIRK_HAS_PMU_CONFIG | \ - QUIRK_HAS_RST_STAT) + QUIRK_HAS_RST_STAT | \ + QUIRK_HAS_PMU_AUTO_DISABLE) static bool nowayout = WATCHDOG_NOWAYOUT; static int tmr_margin; @@ -137,7 +139,7 @@ static const struct s3c2410_wdt_variant drv_data_exynos5250 = { .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, .rst_stat_bit = 20, .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \ - | QUIRK_HAS_WTCLRINT_REG, + | QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_AUTO_DISABLE, }; static const struct s3c2410_wdt_variant drv_data_exynos5420 = { @@ -147,7 +149,7 @@ static const struct s3c2410_wdt_variant drv_data_exynos5420 = { .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, .rst_stat_bit = 9, .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \ - | QUIRK_HAS_WTCLRINT_REG, + | QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_AUTO_DISABLE, }; static const struct s3c2410_wdt_variant drv_data_exynos7 = { @@ -157,7 +159,7 @@ static const struct s3c2410_wdt_variant drv_data_exynos7 = { .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, .rst_stat_bit = 23, /* A57 WDTRESET */ .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \ - | QUIRK_HAS_WTCLRINT_REG, + | QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_AUTO_DISABLE, }; static const struct of_device_id s3c2410_wdt_match[] = { @@ -213,11 +215,13 @@ static int s3c2410wdt_mask_and_disable_reset(struct s3c2410_wdt *wdt, bool mask) if (mask) val = mask_val; - ret = regmap_update_bits(wdt->pmureg, - wdt->drv_data->disable_reg, - mask_val, val); - if (ret < 0) - goto error; + if (wdt->drv_data->quirks & QUIRK_HAS_PMU_AUTO_DISABLE) { + ret = regmap_update_bits(wdt->pmureg, + wdt->drv_data->disable_reg, mask_val, + val); + if (ret < 0) + goto error; + } ret = regmap_update_bits(wdt->pmureg, wdt->drv_data->mask_reset_reg, From 2bd33bb4bc1cdb34b6781f6c1fc1ad475d0ad55b Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:41 +0200 Subject: [PATCH 131/493] watchdog: s3c2410: Extract disable and mask code into separate functions The s3c2410wdt_mask_and_disable_reset() function content is bound to be changed further. Prepare it for upcoming changes by splitting into separate "mask reset" and "disable reset" functions. But keep s3c2410wdt_mask_and_disable_reset() function present as a facade. This commit doesn't bring any functional change to existing devices, but merely provides an infrastructure for upcoming chips support. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-7-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 60 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 2cc4923a98a5..4ac0a30e835e 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -202,37 +202,53 @@ static inline struct s3c2410_wdt *freq_to_wdt(struct notifier_block *nb) return container_of(nb, struct s3c2410_wdt, freq_transition); } -static int s3c2410wdt_mask_and_disable_reset(struct s3c2410_wdt *wdt, bool mask) +static int s3c2410wdt_disable_wdt_reset(struct s3c2410_wdt *wdt, bool mask) { + const u32 mask_val = BIT(wdt->drv_data->mask_bit); + const u32 val = mask ? mask_val : 0; int ret; - u32 mask_val = 1 << wdt->drv_data->mask_bit; - u32 val = 0; - /* No need to do anything if no PMU CONFIG needed */ - if (!(wdt->drv_data->quirks & QUIRK_HAS_PMU_CONFIG)) - return 0; - - if (mask) - val = mask_val; - - if (wdt->drv_data->quirks & QUIRK_HAS_PMU_AUTO_DISABLE) { - ret = regmap_update_bits(wdt->pmureg, - wdt->drv_data->disable_reg, mask_val, - val); - if (ret < 0) - goto error; - } - - ret = regmap_update_bits(wdt->pmureg, - wdt->drv_data->mask_reset_reg, - mask_val, val); - error: + ret = regmap_update_bits(wdt->pmureg, wdt->drv_data->disable_reg, + mask_val, val); if (ret < 0) dev_err(wdt->dev, "failed to update reg(%d)\n", ret); return ret; } +static int s3c2410wdt_mask_wdt_reset(struct s3c2410_wdt *wdt, bool mask) +{ + const u32 mask_val = BIT(wdt->drv_data->mask_bit); + const u32 val = mask ? mask_val : 0; + int ret; + + ret = regmap_update_bits(wdt->pmureg, wdt->drv_data->mask_reset_reg, + mask_val, val); + if (ret < 0) + dev_err(wdt->dev, "failed to update reg(%d)\n", ret); + + return ret; +} + +static int s3c2410wdt_mask_and_disable_reset(struct s3c2410_wdt *wdt, bool mask) +{ + int ret; + + if (wdt->drv_data->quirks & QUIRK_HAS_PMU_AUTO_DISABLE) { + ret = s3c2410wdt_disable_wdt_reset(wdt, mask); + if (ret < 0) + return ret; + } + + if (wdt->drv_data->quirks & QUIRK_HAS_PMU_CONFIG) { + ret = s3c2410wdt_mask_wdt_reset(wdt, mask); + if (ret < 0) + return ret; + } + + return 0; +} + static int s3c2410wdt_keepalive(struct watchdog_device *wdd) { struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); From 370bc7f50f475711c970c8e88b2f4b29b53b5791 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:42 +0200 Subject: [PATCH 132/493] watchdog: s3c2410: Implement a way to invert mask reg value On new Exynos chips (like Exynos850) the MASK_WDT_RESET_REQUEST register is replaced with CLUSTERx_NONCPU_INT_EN, and its mask bit value meaning was reversed: for new register the bit value "1" means "Interrupt enabled", while for MASK_WDT_RESET_REQUEST register "1" means "Mask the interrupt" (i.e. "Interrupt disabled"). Introduce "mask_reset_inv" boolean field in driver data structure; when that field is "true", mask register handling function will invert the value before setting it to the register. This commit doesn't bring any functional change to existing devices, but merely provides an infrastructure for upcoming chips support. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-8-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 4ac0a30e835e..2a61b6ea5602 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -92,6 +92,7 @@ MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, 0 to * timer reset functionality. * @mask_reset_reg: Offset in pmureg for the register that masks the watchdog * timer reset functionality. + * @mask_reset_inv: If set, mask_reset_reg value will have inverted meaning. * @mask_bit: Bit number for the watchdog timer in the disable register and the * mask reset register. * @rst_stat_reg: Offset in pmureg for the register that has the reset status. @@ -103,6 +104,7 @@ MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, 0 to struct s3c2410_wdt_variant { int disable_reg; int mask_reset_reg; + bool mask_reset_inv; int mask_bit; int rst_stat_reg; int rst_stat_bit; @@ -219,7 +221,8 @@ static int s3c2410wdt_disable_wdt_reset(struct s3c2410_wdt *wdt, bool mask) static int s3c2410wdt_mask_wdt_reset(struct s3c2410_wdt *wdt, bool mask) { const u32 mask_val = BIT(wdt->drv_data->mask_bit); - const u32 val = mask ? mask_val : 0; + const bool val_inv = wdt->drv_data->mask_reset_inv; + const u32 val = (mask ^ val_inv) ? mask_val : 0; int ret; ret = regmap_update_bits(wdt->pmureg, wdt->drv_data->mask_reset_reg, From aa220bc6b7581eb1ac2eb98a8d002af95d5d8c8d Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:43 +0200 Subject: [PATCH 133/493] watchdog: s3c2410: Add support for WDT counter enable register On new Exynos chips (e.g. Exynos850) new CLUSTERx_NONCPU_OUT register is introduced, where CNT_EN_WDT bit must be enabled to make watchdog counter running. Add corresponding quirk and proper infrastructure to handle that register if the quirk is set. This commit doesn't bring any functional change to existing devices, but merely provides an infrastructure for upcoming chips support. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-9-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 2a61b6ea5602..ec341c876225 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -60,11 +60,13 @@ #define QUIRK_HAS_RST_STAT (1 << 1) #define QUIRK_HAS_WTCLRINT_REG (1 << 2) #define QUIRK_HAS_PMU_AUTO_DISABLE (1 << 3) +#define QUIRK_HAS_PMU_CNT_EN (1 << 4) /* These quirks require that we have a PMU register map */ #define QUIRKS_HAVE_PMUREG (QUIRK_HAS_PMU_CONFIG | \ QUIRK_HAS_RST_STAT | \ - QUIRK_HAS_PMU_AUTO_DISABLE) + QUIRK_HAS_PMU_AUTO_DISABLE | \ + QUIRK_HAS_PMU_CNT_EN) static bool nowayout = WATCHDOG_NOWAYOUT; static int tmr_margin; @@ -98,6 +100,8 @@ MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, 0 to * @rst_stat_reg: Offset in pmureg for the register that has the reset status. * @rst_stat_bit: Bit number in the rst_stat register indicating a watchdog * reset. + * @cnt_en_reg: Offset in pmureg for the register that enables WDT counter. + * @cnt_en_bit: Bit number for "watchdog counter enable" in cnt_en register. * @quirks: A bitfield of quirks. */ @@ -108,6 +112,8 @@ struct s3c2410_wdt_variant { int mask_bit; int rst_stat_reg; int rst_stat_bit; + int cnt_en_reg; + int cnt_en_bit; u32 quirks; }; @@ -233,6 +239,20 @@ static int s3c2410wdt_mask_wdt_reset(struct s3c2410_wdt *wdt, bool mask) return ret; } +static int s3c2410wdt_enable_counter(struct s3c2410_wdt *wdt, bool en) +{ + const u32 mask_val = BIT(wdt->drv_data->cnt_en_bit); + const u32 val = en ? mask_val : 0; + int ret; + + ret = regmap_update_bits(wdt->pmureg, wdt->drv_data->cnt_en_reg, + mask_val, val); + if (ret < 0) + dev_err(wdt->dev, "failed to update reg(%d)\n", ret); + + return ret; +} + static int s3c2410wdt_mask_and_disable_reset(struct s3c2410_wdt *wdt, bool mask) { int ret; @@ -249,6 +269,12 @@ static int s3c2410wdt_mask_and_disable_reset(struct s3c2410_wdt *wdt, bool mask) return ret; } + if (wdt->drv_data->quirks & QUIRK_HAS_PMU_CNT_EN) { + ret = s3c2410wdt_enable_counter(wdt, !mask); + if (ret < 0) + return ret; + } + return 0; } From cf3fad4e62d363e2e79aed8b7af4eb5bec905df0 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Wed, 24 Nov 2021 01:26:13 +0200 Subject: [PATCH 134/493] watchdog: s3c2410: Cleanup PMU related code Now that PMU enablement code was extended for new Exynos SoCs, it doesn't look very cohesive and consistent anymore. Do a bit of renaming, grouping and style changes, to make it look good again. While at it, add quirks documentation as well. No functional change, just a refactoring commit. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211123232613.22438-1-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 83 ++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index ec341c876225..df67d57ea7e4 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -56,17 +56,51 @@ #define EXYNOS5_RST_STAT_REG_OFFSET 0x0404 #define EXYNOS5_WDT_DISABLE_REG_OFFSET 0x0408 #define EXYNOS5_WDT_MASK_RESET_REG_OFFSET 0x040c -#define QUIRK_HAS_PMU_CONFIG (1 << 0) -#define QUIRK_HAS_RST_STAT (1 << 1) -#define QUIRK_HAS_WTCLRINT_REG (1 << 2) + +/** + * DOC: Quirk flags for different Samsung watchdog IP-cores + * + * This driver supports multiple Samsung SoCs, each of which might have + * different set of registers and features supported. As watchdog block + * sometimes requires modifying PMU registers for proper functioning, register + * differences in both watchdog and PMU IP-cores should be accounted for. Quirk + * flags described below serve the purpose of telling the driver about mentioned + * SoC traits, and can be specified in driver data for each particular supported + * device. + * + * %QUIRK_HAS_WTCLRINT_REG: Watchdog block has WTCLRINT register. It's used to + * clear the interrupt once the interrupt service routine is complete. It's + * write-only, writing any values to this register clears the interrupt, but + * reading is not permitted. + * + * %QUIRK_HAS_PMU_MASK_RESET: PMU block has the register for disabling/enabling + * WDT reset request. On old SoCs it's usually called MASK_WDT_RESET_REQUEST, + * new SoCs have CLUSTERx_NONCPU_INT_EN register, which 'mask_bit' value is + * inverted compared to the former one. + * + * %QUIRK_HAS_PMU_RST_STAT: PMU block has RST_STAT (reset status) register, + * which contains bits indicating the reason for most recent CPU reset. If + * present, driver will use this register to check if previous reboot was due to + * watchdog timer reset. + * + * %QUIRK_HAS_PMU_AUTO_DISABLE: PMU block has AUTOMATIC_WDT_RESET_DISABLE + * register. If 'mask_bit' bit is set, PMU will disable WDT reset when + * corresponding processor is in reset state. + * + * %QUIRK_HAS_PMU_CNT_EN: PMU block has some register (e.g. CLUSTERx_NONCPU_OUT) + * with "watchdog counter enable" bit. That bit should be set to make watchdog + * counter running. + */ +#define QUIRK_HAS_WTCLRINT_REG (1 << 0) +#define QUIRK_HAS_PMU_MASK_RESET (1 << 1) +#define QUIRK_HAS_PMU_RST_STAT (1 << 2) #define QUIRK_HAS_PMU_AUTO_DISABLE (1 << 3) #define QUIRK_HAS_PMU_CNT_EN (1 << 4) /* These quirks require that we have a PMU register map */ -#define QUIRKS_HAVE_PMUREG (QUIRK_HAS_PMU_CONFIG | \ - QUIRK_HAS_RST_STAT | \ - QUIRK_HAS_PMU_AUTO_DISABLE | \ - QUIRK_HAS_PMU_CNT_EN) +#define QUIRKS_HAVE_PMUREG \ + (QUIRK_HAS_PMU_MASK_RESET | QUIRK_HAS_PMU_RST_STAT | \ + QUIRK_HAS_PMU_AUTO_DISABLE | QUIRK_HAS_PMU_CNT_EN) static bool nowayout = WATCHDOG_NOWAYOUT; static int tmr_margin; @@ -146,8 +180,8 @@ static const struct s3c2410_wdt_variant drv_data_exynos5250 = { .mask_bit = 20, .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, .rst_stat_bit = 20, - .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \ - | QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_AUTO_DISABLE, + .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \ + QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_AUTO_DISABLE, }; static const struct s3c2410_wdt_variant drv_data_exynos5420 = { @@ -156,8 +190,8 @@ static const struct s3c2410_wdt_variant drv_data_exynos5420 = { .mask_bit = 0, .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, .rst_stat_bit = 9, - .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \ - | QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_AUTO_DISABLE, + .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \ + QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_AUTO_DISABLE, }; static const struct s3c2410_wdt_variant drv_data_exynos7 = { @@ -166,8 +200,8 @@ static const struct s3c2410_wdt_variant drv_data_exynos7 = { .mask_bit = 23, .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, .rst_stat_bit = 23, /* A57 WDTRESET */ - .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \ - | QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_AUTO_DISABLE, + .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \ + QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_AUTO_DISABLE, }; static const struct of_device_id s3c2410_wdt_match[] = { @@ -253,24 +287,24 @@ static int s3c2410wdt_enable_counter(struct s3c2410_wdt *wdt, bool en) return ret; } -static int s3c2410wdt_mask_and_disable_reset(struct s3c2410_wdt *wdt, bool mask) +static int s3c2410wdt_enable(struct s3c2410_wdt *wdt, bool en) { int ret; if (wdt->drv_data->quirks & QUIRK_HAS_PMU_AUTO_DISABLE) { - ret = s3c2410wdt_disable_wdt_reset(wdt, mask); + ret = s3c2410wdt_disable_wdt_reset(wdt, !en); if (ret < 0) return ret; } - if (wdt->drv_data->quirks & QUIRK_HAS_PMU_CONFIG) { - ret = s3c2410wdt_mask_wdt_reset(wdt, mask); + if (wdt->drv_data->quirks & QUIRK_HAS_PMU_MASK_RESET) { + ret = s3c2410wdt_mask_wdt_reset(wdt, !en); if (ret < 0) return ret; } if (wdt->drv_data->quirks & QUIRK_HAS_PMU_CNT_EN) { - ret = s3c2410wdt_enable_counter(wdt, !mask); + ret = s3c2410wdt_enable_counter(wdt, en); if (ret < 0) return ret; } @@ -531,7 +565,7 @@ static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt) unsigned int rst_stat; int ret; - if (!(wdt->drv_data->quirks & QUIRK_HAS_RST_STAT)) + if (!(wdt->drv_data->quirks & QUIRK_HAS_PMU_RST_STAT)) return 0; ret = regmap_read(wdt->pmureg, wdt->drv_data->rst_stat_reg, &rst_stat); @@ -672,7 +706,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev) if (ret) goto err_cpufreq; - ret = s3c2410wdt_mask_and_disable_reset(wdt, false); + ret = s3c2410wdt_enable(wdt, true); if (ret < 0) goto err_unregister; @@ -707,7 +741,7 @@ static int s3c2410wdt_remove(struct platform_device *dev) int ret; struct s3c2410_wdt *wdt = platform_get_drvdata(dev); - ret = s3c2410wdt_mask_and_disable_reset(wdt, true); + ret = s3c2410wdt_enable(wdt, false); if (ret < 0) return ret; @@ -724,8 +758,7 @@ static void s3c2410wdt_shutdown(struct platform_device *dev) { struct s3c2410_wdt *wdt = platform_get_drvdata(dev); - s3c2410wdt_mask_and_disable_reset(wdt, true); - + s3c2410wdt_enable(wdt, false); s3c2410wdt_stop(&wdt->wdt_device); } @@ -740,7 +773,7 @@ static int s3c2410wdt_suspend(struct device *dev) wdt->wtcon_save = readl(wdt->reg_base + S3C2410_WTCON); wdt->wtdat_save = readl(wdt->reg_base + S3C2410_WTDAT); - ret = s3c2410wdt_mask_and_disable_reset(wdt, true); + ret = s3c2410wdt_enable(wdt, false); if (ret < 0) return ret; @@ -760,7 +793,7 @@ static int s3c2410wdt_resume(struct device *dev) writel(wdt->wtdat_save, wdt->reg_base + S3C2410_WTCNT);/* Reset count */ writel(wdt->wtcon_save, wdt->reg_base + S3C2410_WTCON); - ret = s3c2410wdt_mask_and_disable_reset(wdt, false); + ret = s3c2410wdt_enable(wdt, true); if (ret < 0) return ret; From e249d01b5e8b8263ee2fdb787c954450940a7677 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:45 +0200 Subject: [PATCH 135/493] watchdog: s3c2410: Support separate source clock Right now all devices supported in the driver have the single clock: it acts simultaneously as a bus clock (providing register interface clocking) and source clock (driving watchdog counter). Some newer Exynos chips, like Exynos850, have two separate clocks for that. In that case two clocks will be passed to the driver from the resource provider, e.g. Device Tree. Provide necessary infrastructure to support that case: - use source clock's rate for all timer related calculations - use bus clock to gate/ungate the register interface All devices that use the single clock are kept intact: if only one clock is passed from Device Tree, it will be used for both purposes as before. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-11-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 56 +++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index df67d57ea7e4..924e0c17de76 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -153,7 +153,8 @@ struct s3c2410_wdt_variant { struct s3c2410_wdt { struct device *dev; - struct clk *clock; + struct clk *bus_clk; /* for register interface (PCLK) */ + struct clk *src_clk; /* for WDT counter */ void __iomem *reg_base; unsigned int count; spinlock_t lock; @@ -231,9 +232,14 @@ MODULE_DEVICE_TABLE(platform, s3c2410_wdt_ids); /* functions */ -static inline unsigned int s3c2410wdt_max_timeout(struct clk *clock) +static inline unsigned long s3c2410wdt_get_freq(struct s3c2410_wdt *wdt) { - unsigned long freq = clk_get_rate(clock); + return clk_get_rate(wdt->src_clk ? wdt->src_clk : wdt->bus_clk); +} + +static inline unsigned int s3c2410wdt_max_timeout(struct s3c2410_wdt *wdt) +{ + const unsigned long freq = s3c2410wdt_get_freq(wdt); return S3C2410_WTCNT_MAXCNT / (freq / (S3C2410_WTCON_PRESCALE_MAX + 1) / S3C2410_WTCON_MAXDIV); @@ -383,7 +389,7 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned int timeout) { struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); - unsigned long freq = clk_get_rate(wdt->clock); + unsigned long freq = s3c2410wdt_get_freq(wdt); unsigned int count; unsigned int divisor = 1; unsigned long wtcon; @@ -632,26 +638,42 @@ static int s3c2410wdt_probe(struct platform_device *pdev) goto err; } - wdt->clock = devm_clk_get(dev, "watchdog"); - if (IS_ERR(wdt->clock)) { - dev_err(dev, "failed to find watchdog clock source\n"); - ret = PTR_ERR(wdt->clock); + wdt->bus_clk = devm_clk_get(dev, "watchdog"); + if (IS_ERR(wdt->bus_clk)) { + dev_err(dev, "failed to find bus clock\n"); + ret = PTR_ERR(wdt->bus_clk); goto err; } - ret = clk_prepare_enable(wdt->clock); + ret = clk_prepare_enable(wdt->bus_clk); if (ret < 0) { - dev_err(dev, "failed to enable clock\n"); + dev_err(dev, "failed to enable bus clock\n"); return ret; } + /* + * "watchdog_src" clock is optional; if it's not present -- just skip it + * and use "watchdog" clock as both bus and source clock. + */ + wdt->src_clk = devm_clk_get(dev, "watchdog_src"); + if (!IS_ERR(wdt->src_clk)) { + ret = clk_prepare_enable(wdt->src_clk); + if (ret < 0) { + dev_err(dev, "failed to enable source clock\n"); + ret = PTR_ERR(wdt->src_clk); + goto err_bus_clk; + } + } else { + wdt->src_clk = NULL; + } + wdt->wdt_device.min_timeout = 1; - wdt->wdt_device.max_timeout = s3c2410wdt_max_timeout(wdt->clock); + wdt->wdt_device.max_timeout = s3c2410wdt_max_timeout(wdt); ret = s3c2410wdt_cpufreq_register(wdt); if (ret < 0) { dev_err(dev, "failed to register cpufreq\n"); - goto err_clk; + goto err_src_clk; } watchdog_set_drvdata(&wdt->wdt_device, wdt); @@ -729,8 +751,11 @@ static int s3c2410wdt_probe(struct platform_device *pdev) err_cpufreq: s3c2410wdt_cpufreq_deregister(wdt); - err_clk: - clk_disable_unprepare(wdt->clock); + err_src_clk: + clk_disable_unprepare(wdt->src_clk); + + err_bus_clk: + clk_disable_unprepare(wdt->bus_clk); err: return ret; @@ -749,7 +774,8 @@ static int s3c2410wdt_remove(struct platform_device *dev) s3c2410wdt_cpufreq_deregister(wdt); - clk_disable_unprepare(wdt->clock); + clk_disable_unprepare(wdt->src_clk); + clk_disable_unprepare(wdt->bus_clk); return 0; } From 1a47cda07af4d81a49a140b52220ca56cd6e79a6 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:46 +0200 Subject: [PATCH 136/493] watchdog: s3c2410: Remove superfluous err label 'err' label in probe function is not really need, it just returns. Remove it and replace all 'goto' statements with actual returns in place. No functional change here, just a cleanup patch. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211107202943.8859-12-semen.protsenko@linaro.org [groeck: Fixed context conflicts] Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 924e0c17de76..62d797a8bfdf 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -627,22 +627,18 @@ static int s3c2410wdt_probe(struct platform_device *pdev) wdt_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (wdt_irq == NULL) { dev_err(dev, "no irq resource specified\n"); - ret = -ENOENT; - goto err; + return -ENOENT; } /* get the memory region for the watchdog timer */ wdt->reg_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(wdt->reg_base)) { - ret = PTR_ERR(wdt->reg_base); - goto err; - } + if (IS_ERR(wdt->reg_base)) + return PTR_ERR(wdt->reg_base); wdt->bus_clk = devm_clk_get(dev, "watchdog"); if (IS_ERR(wdt->bus_clk)) { dev_err(dev, "failed to find bus clock\n"); - ret = PTR_ERR(wdt->bus_clk); - goto err; + return PTR_ERR(wdt->bus_clk); } ret = clk_prepare_enable(wdt->bus_clk); @@ -757,7 +753,6 @@ static int s3c2410wdt_probe(struct platform_device *pdev) err_bus_clk: clk_disable_unprepare(wdt->bus_clk); - err: return ret; } From 5c9348157b9dc7a5f526934729bf4e5433b3a5b8 Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Fri, 26 Nov 2021 15:39:59 +0800 Subject: [PATCH 137/493] dt-bindings: watchdog: imx7ulp-wdt: Add imx8ulp compatible string The wdog on i.MX8ULP is derived from i.MX7ULP, it uses two compatible strings, so update the compatible string for i.MX8ULP. Reviewed-by: Dong Aisheng Acked-by: Rob Herring Signed-off-by: Jacky Bai Signed-off-by: Peng Fan Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211112082930.3809351-7-peng.fan@oss.nxp.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml index 51d6d482bbc2..fb603a20e396 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml @@ -14,8 +14,11 @@ allOf: properties: compatible: - enum: - - fsl,imx7ulp-wdt + oneOf: + - const: fsl,imx7ulp-wdt + - items: + - const: fsl,imx8ulp-wdt + - const: fsl,imx7ulp-wdt reg: maxItems: 1 From 15ebdc43d703e95e4ec9bae1b5411f1afb07c0b8 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Sat, 11 Dec 2021 18:59:48 +0100 Subject: [PATCH 138/493] watchdog: Kconfig: fix help text indentation Some entries indent their help text with 1 tab + 1 space or 1 tab only instead of 1 tab + 2 spaces. Add the missing spaces. Signed-off-by: Luca Ceresoli Acked-by: Randy Dunlap Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211111225852.3128201-7-luca@lucaceresoli.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 48 ++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 659064979a97..4d04883335b3 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -680,10 +680,10 @@ config MAX77620_WATCHDOG depends on MFD_MAX77620 || COMPILE_TEST select WATCHDOG_CORE help - This is the driver for the Max77620 watchdog timer. - Say 'Y' here to enable the watchdog timer support for - MAX77620 chips. To compile this driver as a module, - choose M here: the module will be called max77620_wdt. + This is the driver for the Max77620 watchdog timer. + Say 'Y' here to enable the watchdog timer support for + MAX77620 chips. To compile this driver as a module, + choose M here: the module will be called max77620_wdt. config IMX2_WDT tristate "IMX2+ Watchdog" @@ -1441,26 +1441,26 @@ config TQMX86_WDT depends on X86 select WATCHDOG_CORE help - This is the driver for the hardware watchdog timer in the TQMX86 IO - controller found on some of their ComExpress Modules. + This is the driver for the hardware watchdog timer in the TQMX86 IO + controller found on some of their ComExpress Modules. - To compile this driver as a module, choose M here; the module - will be called tqmx86_wdt. + To compile this driver as a module, choose M here; the module + will be called tqmx86_wdt. - Most people will say N. + Most people will say N. config VIA_WDT tristate "VIA Watchdog Timer" depends on X86 && PCI select WATCHDOG_CORE help - This is the driver for the hardware watchdog timer on VIA - southbridge chipset CX700, VX800/VX820 or VX855/VX875. + This is the driver for the hardware watchdog timer on VIA + southbridge chipset CX700, VX800/VX820 or VX855/VX875. - To compile this driver as a module, choose M here; the module - will be called via_wdt. + To compile this driver as a module, choose M here; the module + will be called via_wdt. - Most people will say N. + Most people will say N. config W83627HF_WDT tristate "Watchdog timer for W83627HF/W83627DHG and compatibles" @@ -1746,10 +1746,10 @@ config BCM7038_WDT depends on HAS_IOMEM depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST help - Watchdog driver for the built-in hardware in Broadcom 7038 and - later SoCs used in set-top boxes. BCM7038 was made public - during the 2004 CES, and since then, many Broadcom chips use this - watchdog block, including some cable modem chips. + Watchdog driver for the built-in hardware in Broadcom 7038 and + later SoCs used in set-top boxes. BCM7038 was made public + during the 2004 CES, and since then, many Broadcom chips use this + watchdog block, including some cable modem chips. config IMGPDC_WDT tristate "Imagination Technologies PDC Watchdog Timer" @@ -2110,12 +2110,12 @@ config KEEMBAY_WATCHDOG depends on ARCH_KEEMBAY || (ARM64 && COMPILE_TEST) select WATCHDOG_CORE help - This option enable support for an In-secure watchdog timer driver for - Intel Keem Bay SoC. This WDT has a 32 bit timer and decrements in every - count unit. An interrupt will be triggered, when the count crosses - the threshold configured in the register. + This option enable support for an In-secure watchdog timer driver for + Intel Keem Bay SoC. This WDT has a 32 bit timer and decrements in every + count unit. An interrupt will be triggered, when the count crosses + the threshold configured in the register. - To compile this driver as a module, choose M here: the - module will be called keembay_wdt. + To compile this driver as a module, choose M here: the + module will be called keembay_wdt. endif # WATCHDOG From aeaacc064d8502b3f3ee662325df8fab24ad9006 Mon Sep 17 00:00:00 2001 From: Artem Lapkin Date: Wed, 10 Nov 2021 10:25:18 +0800 Subject: [PATCH 139/493] watchdog: meson_gxbb_wdt: remove stop_on_reboot Remove watchdog_stop_on_reboot() The Meson platform still has some hardware drivers problems for some configurations which can freeze devices on shutdown/reboot. Remove watchdog_stop_on_reboot() to catch this situation and ensure that the reboot happens anyway. Users who still want to stop the watchdog on reboot can still do so using the watchdog.stop_on_reboot=1 module parameter. https://lore.kernel.org/linux-watchdog/20210729072308.1908904-1-art@khadas.com/T/#t Signed-off-by: Artem Lapkin Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211110022518.1676834-1-art@khadas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/meson_gxbb_wdt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c index 945f5e65db57..d3c9e2f6e63b 100644 --- a/drivers/watchdog/meson_gxbb_wdt.c +++ b/drivers/watchdog/meson_gxbb_wdt.c @@ -198,7 +198,6 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev) meson_gxbb_wdt_set_timeout(&data->wdt_dev, data->wdt_dev.timeout); - watchdog_stop_on_reboot(&data->wdt_dev); return devm_watchdog_register_device(dev, &data->wdt_dev); } From 9439c9fde835977467e077cc622ffff95e4c2925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 13 Nov 2021 12:46:44 +0100 Subject: [PATCH 140/493] dt-bindings: watchdog: convert Broadcom's WDT to the json-schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helps validating DTS files. Signed-off-by: RafaÅ‚ MiÅ‚ecki Acked-by: Florian Fainelli Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211115055354.6089-1-zajec5@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/brcm,bcm7038-wdt.txt | 19 --------- .../bindings/watchdog/brcm,bcm7038-wdt.yaml | 41 +++++++++++++++++++ 2 files changed, 41 insertions(+), 19 deletions(-) delete mode 100644 Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt create mode 100644 Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml diff --git a/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt b/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt deleted file mode 100644 index 84122270be8f..000000000000 --- a/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt +++ /dev/null @@ -1,19 +0,0 @@ -BCM7038 Watchdog timer - -Required properties: - -- compatible : should be "brcm,bcm7038-wdt" -- reg : Specifies base physical address and size of the registers. - -Optional properties: - -- clocks: The clock running the watchdog. If no clock is found the - driver will default to 27000000 Hz. - -Example: - -watchdog@f040a7e8 { - compatible = "brcm,bcm7038-wdt"; - clocks = <&upg_fixed>; - reg = <0xf040a7e8 0x16>; -}; diff --git a/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml b/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml new file mode 100644 index 000000000000..ed6210666ead --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/brcm,bcm7038-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: BCM7038 watchdog timer + +allOf: + - $ref: "watchdog.yaml#" + +maintainers: + - Florian Fainelli + - Justin Chen + - RafaÅ‚ MiÅ‚ecki + +properties: + compatible: + const: brcm,bcm7038-wdt + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + description: > + The clock running the watchdog. If no clock is found the driver will + default to 27000000 Hz. + +unevaluatedProperties: false + +required: + - reg + +examples: + - | + watchdog@f040a7e8 { + compatible = "brcm,bcm7038-wdt"; + reg = <0xf040a7e8 0x16>; + clocks = <&upg_fixed>; + }; From 17fffe91ba36d11c7b17be154ecc7c1ed31527eb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Nov 2021 14:46:31 -0800 Subject: [PATCH 141/493] dt-bindings: watchdog: Add BCM6345 compatible to BCM7038 binding The BCM7038 watchdog binding is updated to include a "brcm,bcm6345-wdt" compatible string which is the first instance of a DSL (BCM63xx) SoC seeing the integration of such a watchdog timer block. Reviewed-by: Rob Herring Signed-off-by: Florian Fainelli Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211112224636.395101-3-f.fainelli@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml b/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml index ed6210666ead..a926809352b8 100644 --- a/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/watchdog/brcm,bcm7038-wdt.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: BCM7038 watchdog timer +title: BCM63xx and BCM7038 watchdog timer allOf: - $ref: "watchdog.yaml#" @@ -16,7 +16,9 @@ maintainers: properties: compatible: - const: brcm,bcm7038-wdt + enum: + - brcm,bcm6345-wdt + - brcm,bcm7038-wdt reg: maxItems: 1 From d6b9c679bbac1d1d2fcac64391b4cadb91763a6f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Nov 2021 14:46:32 -0800 Subject: [PATCH 142/493] watchdog: bcm7038_wdt: Support platform data configuration The BCM7038 watchdog driver needs to be able to obtain a specific clock name on BCM63xx platforms which is the "periph" clock ticking at 50MHz. make it possible to specify the clock name to obtain via platform data. Signed-off-by: Florian Fainelli Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211112224636.395101-4-f.fainelli@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/bcm7038_wdt.c | 8 +++++++- include/linux/platform_data/bcm7038_wdt.h | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 include/linux/platform_data/bcm7038_wdt.h diff --git a/drivers/watchdog/bcm7038_wdt.c b/drivers/watchdog/bcm7038_wdt.c index acaaa0005d5b..506cd7ef9c77 100644 --- a/drivers/watchdog/bcm7038_wdt.c +++ b/drivers/watchdog/bcm7038_wdt.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -133,8 +134,10 @@ static void bcm7038_clk_disable_unprepare(void *data) static int bcm7038_wdt_probe(struct platform_device *pdev) { + struct bcm7038_wdt_platform_data *pdata = pdev->dev.platform_data; struct device *dev = &pdev->dev; struct bcm7038_watchdog *wdt; + const char *clk_name = NULL; int err; wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); @@ -147,7 +150,10 @@ static int bcm7038_wdt_probe(struct platform_device *pdev) if (IS_ERR(wdt->base)) return PTR_ERR(wdt->base); - wdt->clk = devm_clk_get(dev, NULL); + if (pdata && pdata->clk_name) + clk_name = pdata->clk_name; + + wdt->clk = devm_clk_get(dev, clk_name); /* If unable to get clock, use default frequency */ if (!IS_ERR(wdt->clk)) { err = clk_prepare_enable(wdt->clk); diff --git a/include/linux/platform_data/bcm7038_wdt.h b/include/linux/platform_data/bcm7038_wdt.h new file mode 100644 index 000000000000..e18cfd9ec8f9 --- /dev/null +++ b/include/linux/platform_data/bcm7038_wdt.h @@ -0,0 +1,8 @@ +#ifndef __BCM7038_WDT_PDATA_H +#define __BCM7038_WDT_PDATA_H + +struct bcm7038_wdt_platform_data { + const char *clk_name; +}; + +#endif /* __BCM7038_WDT_PDATA_H */ From bc0bf9e9ac3ba49d1c7ab267a58204b525ac054b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Nov 2021 14:46:33 -0800 Subject: [PATCH 143/493] watchdog: Allow building BCM7038_WDT for BCM63XX CONFIG_BCM63XX denotes the legacy MIPS-based DSL SoCs which utilize the same piece of hardware as a watchdog, make it possible to select that driver for those platforms. Signed-off-by: Florian Fainelli Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211112224636.395101-5-f.fainelli@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 4d04883335b3..64ff93c06d02 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1741,15 +1741,16 @@ config BCM_KONA_WDT_DEBUG If in doubt, say 'N'. config BCM7038_WDT - tristate "BCM7038 Watchdog" + tristate "BCM63xx/BCM7038 Watchdog" select WATCHDOG_CORE depends on HAS_IOMEM - depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST + depends on ARCH_BRCMSTB || BMIPS_GENERIC || BCM63XX || COMPILE_TEST help Watchdog driver for the built-in hardware in Broadcom 7038 and later SoCs used in set-top boxes. BCM7038 was made public during the 2004 CES, and since then, many Broadcom chips use this - watchdog block, including some cable modem chips. + watchdog block, including some cable modem chips and DSL (63xx) + chips. config IMGPDC_WDT tristate "Imagination Technologies PDC Watchdog Timer" From e764faef774b931994d31a856d786734691ab26e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Nov 2021 14:46:34 -0800 Subject: [PATCH 144/493] watchdog: bcm7038_wdt: Add platform device id for bcm63xx-wdt In order to phase out bcm63xx_wdt and use bcm7038_wdt instead, introduce a platform_device_id table that allows both names to be matched. Signed-off-by: Florian Fainelli Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211112224636.395101-6-f.fainelli@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/bcm7038_wdt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/watchdog/bcm7038_wdt.c b/drivers/watchdog/bcm7038_wdt.c index 506cd7ef9c77..8656a137e9a4 100644 --- a/drivers/watchdog/bcm7038_wdt.c +++ b/drivers/watchdog/bcm7038_wdt.c @@ -223,8 +223,15 @@ static const struct of_device_id bcm7038_wdt_match[] = { }; MODULE_DEVICE_TABLE(of, bcm7038_wdt_match); +static const struct platform_device_id bcm7038_wdt_devtype[] = { + { .name = "bcm63xx-wdt" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(platform, bcm7038_wdt_devtype); + static struct platform_driver bcm7038_wdt_driver = { .probe = bcm7038_wdt_probe, + .id_table = bcm7038_wdt_devtype, .driver = { .name = "bcm7038-wdt", .of_match_table = bcm7038_wdt_match, From b844f9181b4a1014d501a26dc25b39f363626b8c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Nov 2021 14:46:35 -0800 Subject: [PATCH 145/493] MIPS: BCM63XX: Provide platform data to watchdog device In order to utilize the bcm7038_wdt.c driver which needs to know the clock name to obtain, pass it via platform data using the bcm7038_wdt_platform_data structure. Signed-off-by: Florian Fainelli Acked-by: Thomas Bogendoerfer Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211112224636.395101-7-f.fainelli@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- arch/mips/bcm63xx/dev-wdt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/mips/bcm63xx/dev-wdt.c b/arch/mips/bcm63xx/dev-wdt.c index 2a2346a99bcb..42130914a3c2 100644 --- a/arch/mips/bcm63xx/dev-wdt.c +++ b/arch/mips/bcm63xx/dev-wdt.c @@ -9,6 +9,7 @@ #include #include #include +#include #include static struct resource wdt_resources[] = { @@ -19,11 +20,18 @@ static struct resource wdt_resources[] = { }, }; +static struct bcm7038_wdt_platform_data bcm63xx_wdt_pdata = { + .clk_name = "periph", +}; + static struct platform_device bcm63xx_wdt_device = { .name = "bcm63xx-wdt", .id = -1, .num_resources = ARRAY_SIZE(wdt_resources), .resource = wdt_resources, + .dev = { + .platform_data = &bcm63xx_wdt_pdata, + }, }; int __init bcm63xx_wdt_register(void) From f8d9ba7fedd2a5c21759ce3f39a37663c895d3dd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Nov 2021 14:46:36 -0800 Subject: [PATCH 146/493] watchdog: Remove BCM63XX_WDT Now that we can utilize the BCM7038_WDT driver, remove that one which was not converted to the watchdog APIs. There are a couple of notable differences with how the bcm7038_wdt driver proceeds: - bcm63xx_wdt would register with the ad-hoc BCM63xx hardware timer API, but this would only be used in order to catch the interrupt *before* a SoC reset and make the kernel "die" - bcm6xx_wdt would register a software timer and kick it every second in order to pet the watchdog, thus offering a two step watchdog process. This is not something that is brought over to the bcm7038_wdt as it is deemed unnecessary. If user-space cannot pet the watchdog, but a kernel timer can, the system is still in a bad shape anyway. bcm7038_wdt is simpler in its behavior and behaves as a standard watchdog driver and is not making use of any specific platform APIs, therefore making it more maintainable and extensible. Signed-off-by: Florian Fainelli Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211112224636.395101-8-f.fainelli@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 10 -- drivers/watchdog/Makefile | 1 - drivers/watchdog/bcm63xx_wdt.c | 317 --------------------------------- 3 files changed, 328 deletions(-) delete mode 100644 drivers/watchdog/bcm63xx_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 64ff93c06d02..73084e008c2b 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1697,16 +1697,6 @@ config OCTEON_WDT from the first interrupt, it is then only poked when the device is written. -config BCM63XX_WDT - tristate "Broadcom BCM63xx hardware watchdog" - depends on BCM63XX - help - Watchdog driver for the built in watchdog hardware in Broadcom - BCM63xx SoC. - - To compile this driver as a loadable module, choose M here. - The module will be called bcm63xx_wdt. - config BCM2835_WDT tristate "Broadcom BCM2835 hardware watchdog" depends on ARCH_BCM2835 || (OF && COMPILE_TEST) diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 2ee97064145b..b01007c0396c 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -153,7 +153,6 @@ obj-$(CONFIG_XILINX_WATCHDOG) += of_xilinx_wdt.o # MIPS Architecture obj-$(CONFIG_ATH79_WDT) += ath79_wdt.o obj-$(CONFIG_BCM47XX_WDT) += bcm47xx_wdt.o -obj-$(CONFIG_BCM63XX_WDT) += bcm63xx_wdt.o obj-$(CONFIG_RC32434_WDT) += rc32434_wdt.o obj-$(CONFIG_INDYDOG) += indydog.o obj-$(CONFIG_JZ4740_WDT) += jz4740_wdt.o diff --git a/drivers/watchdog/bcm63xx_wdt.c b/drivers/watchdog/bcm63xx_wdt.c deleted file mode 100644 index 56cc262571a5..000000000000 --- a/drivers/watchdog/bcm63xx_wdt.c +++ /dev/null @@ -1,317 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Broadcom BCM63xx SoC watchdog driver - * - * Copyright (C) 2007, Miguel Gaio - * Copyright (C) 2008, Florian Fainelli - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define PFX KBUILD_MODNAME - -#define WDT_HZ 50000000 /* Fclk */ -#define WDT_DEFAULT_TIME 30 /* seconds */ -#define WDT_MAX_TIME 256 /* seconds */ - -static struct { - void __iomem *regs; - struct timer_list timer; - unsigned long inuse; - atomic_t ticks; -} bcm63xx_wdt_device; - -static int expect_close; - -static int wdt_time = WDT_DEFAULT_TIME; -static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" - __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); - -/* HW functions */ -static void bcm63xx_wdt_hw_start(void) -{ - bcm_writel(0xfffffffe, bcm63xx_wdt_device.regs + WDT_DEFVAL_REG); - bcm_writel(WDT_START_1, bcm63xx_wdt_device.regs + WDT_CTL_REG); - bcm_writel(WDT_START_2, bcm63xx_wdt_device.regs + WDT_CTL_REG); -} - -static void bcm63xx_wdt_hw_stop(void) -{ - bcm_writel(WDT_STOP_1, bcm63xx_wdt_device.regs + WDT_CTL_REG); - bcm_writel(WDT_STOP_2, bcm63xx_wdt_device.regs + WDT_CTL_REG); -} - -static void bcm63xx_wdt_isr(void *data) -{ - struct pt_regs *regs = get_irq_regs(); - - die(PFX " fire", regs); -} - -static void bcm63xx_timer_tick(struct timer_list *unused) -{ - if (!atomic_dec_and_test(&bcm63xx_wdt_device.ticks)) { - bcm63xx_wdt_hw_start(); - mod_timer(&bcm63xx_wdt_device.timer, jiffies + HZ); - } else - pr_crit("watchdog will restart system\n"); -} - -static void bcm63xx_wdt_pet(void) -{ - atomic_set(&bcm63xx_wdt_device.ticks, wdt_time); -} - -static void bcm63xx_wdt_start(void) -{ - bcm63xx_wdt_pet(); - bcm63xx_timer_tick(0); -} - -static void bcm63xx_wdt_pause(void) -{ - del_timer_sync(&bcm63xx_wdt_device.timer); - bcm63xx_wdt_hw_stop(); -} - -static int bcm63xx_wdt_settimeout(int new_time) -{ - if ((new_time <= 0) || (new_time > WDT_MAX_TIME)) - return -EINVAL; - - wdt_time = new_time; - - return 0; -} - -static int bcm63xx_wdt_open(struct inode *inode, struct file *file) -{ - if (test_and_set_bit(0, &bcm63xx_wdt_device.inuse)) - return -EBUSY; - - bcm63xx_wdt_start(); - return stream_open(inode, file); -} - -static int bcm63xx_wdt_release(struct inode *inode, struct file *file) -{ - if (expect_close == 42) - bcm63xx_wdt_pause(); - else { - pr_crit("Unexpected close, not stopping watchdog!\n"); - bcm63xx_wdt_start(); - } - clear_bit(0, &bcm63xx_wdt_device.inuse); - expect_close = 0; - return 0; -} - -static ssize_t bcm63xx_wdt_write(struct file *file, const char *data, - size_t len, loff_t *ppos) -{ - if (len) { - if (!nowayout) { - size_t i; - - /* In case it was set long ago */ - expect_close = 0; - - for (i = 0; i != len; i++) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') - expect_close = 42; - } - } - bcm63xx_wdt_pet(); - } - return len; -} - -static struct watchdog_info bcm63xx_wdt_info = { - .identity = PFX, - .options = WDIOF_SETTIMEOUT | - WDIOF_KEEPALIVEPING | - WDIOF_MAGICCLOSE, -}; - - -static long bcm63xx_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value, retval = -EINVAL; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &bcm63xx_wdt_info, - sizeof(bcm63xx_wdt_info)) ? -EFAULT : 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - - case WDIOC_SETOPTIONS: - if (get_user(new_value, p)) - return -EFAULT; - - if (new_value & WDIOS_DISABLECARD) { - bcm63xx_wdt_pause(); - retval = 0; - } - if (new_value & WDIOS_ENABLECARD) { - bcm63xx_wdt_start(); - retval = 0; - } - - return retval; - - case WDIOC_KEEPALIVE: - bcm63xx_wdt_pet(); - return 0; - - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - - if (bcm63xx_wdt_settimeout(new_value)) - return -EINVAL; - - bcm63xx_wdt_pet(); - - fallthrough; - - case WDIOC_GETTIMEOUT: - return put_user(wdt_time, p); - - default: - return -ENOTTY; - - } -} - -static const struct file_operations bcm63xx_wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .write = bcm63xx_wdt_write, - .unlocked_ioctl = bcm63xx_wdt_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .open = bcm63xx_wdt_open, - .release = bcm63xx_wdt_release, -}; - -static struct miscdevice bcm63xx_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &bcm63xx_wdt_fops, -}; - - -static int bcm63xx_wdt_probe(struct platform_device *pdev) -{ - int ret; - struct resource *r; - - timer_setup(&bcm63xx_wdt_device.timer, bcm63xx_timer_tick, 0); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { - dev_err(&pdev->dev, "failed to get resources\n"); - return -ENODEV; - } - - bcm63xx_wdt_device.regs = devm_ioremap(&pdev->dev, r->start, - resource_size(r)); - if (!bcm63xx_wdt_device.regs) { - dev_err(&pdev->dev, "failed to remap I/O resources\n"); - return -ENXIO; - } - - ret = bcm63xx_timer_register(TIMER_WDT_ID, bcm63xx_wdt_isr, NULL); - if (ret < 0) { - dev_err(&pdev->dev, "failed to register wdt timer isr\n"); - return ret; - } - - if (bcm63xx_wdt_settimeout(wdt_time)) { - bcm63xx_wdt_settimeout(WDT_DEFAULT_TIME); - dev_info(&pdev->dev, - ": wdt_time value must be 1 <= wdt_time <= 256, using %d\n", - wdt_time); - } - - ret = misc_register(&bcm63xx_wdt_miscdev); - if (ret < 0) { - dev_err(&pdev->dev, "failed to register watchdog device\n"); - goto unregister_timer; - } - - dev_info(&pdev->dev, " started, timer margin: %d sec\n", - WDT_DEFAULT_TIME); - - return 0; - -unregister_timer: - bcm63xx_timer_unregister(TIMER_WDT_ID); - return ret; -} - -static int bcm63xx_wdt_remove(struct platform_device *pdev) -{ - if (!nowayout) - bcm63xx_wdt_pause(); - - misc_deregister(&bcm63xx_wdt_miscdev); - bcm63xx_timer_unregister(TIMER_WDT_ID); - return 0; -} - -static void bcm63xx_wdt_shutdown(struct platform_device *pdev) -{ - bcm63xx_wdt_pause(); -} - -static struct platform_driver bcm63xx_wdt_driver = { - .probe = bcm63xx_wdt_probe, - .remove = bcm63xx_wdt_remove, - .shutdown = bcm63xx_wdt_shutdown, - .driver = { - .name = "bcm63xx-wdt", - } -}; - -module_platform_driver(bcm63xx_wdt_driver); - -MODULE_AUTHOR("Miguel Gaio "); -MODULE_AUTHOR("Florian Fainelli "); -MODULE_DESCRIPTION("Driver for the Broadcom BCM63xx SoC watchdog"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:bcm63xx-wdt"); From 1fc8a2c021c3abb8083ef4d9b6d4c93f88f33dc7 Mon Sep 17 00:00:00 2001 From: Changcheng Deng Date: Thu, 25 Nov 2021 01:49:24 +0000 Subject: [PATCH 147/493] watchdog: davinci: Use div64_ul instead of do_div do_div() does a 64-by-32 division. Here the divisor is an unsigned long which on some platforms is 64 bit wide. So use div64_ul instead of do_div to avoid a possible truncation. Reported-by: Zeal Robot Signed-off-by: Changcheng Deng Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211125014924.46297-1-deng.changcheng@zte.com.cn Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/davinci_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c index e6eaba6bae5b..584a56893b81 100644 --- a/drivers/watchdog/davinci_wdt.c +++ b/drivers/watchdog/davinci_wdt.c @@ -134,7 +134,7 @@ static unsigned int davinci_wdt_get_timeleft(struct watchdog_device *wdd) timer_counter = ioread32(davinci_wdt->base + TIM12); timer_counter |= ((u64)ioread32(davinci_wdt->base + TIM34) << 32); - do_div(timer_counter, freq); + timer_counter = div64_ul(timer_counter, freq); return wdd->timeout - timer_counter; } From 968011a291f3c80afe9446968d21422569f1bc1c Mon Sep 17 00:00:00 2001 From: Yunus Bas Date: Wed, 24 Nov 2021 09:06:54 +0100 Subject: [PATCH 148/493] watchdog: da9063: use atomic safe i2c transfer in reset handler This patch is based on commit 057b52b4b3d5 ("watchdog: da9062: make restart handler atomic safe"), which uses the atomic transfer capability of the i2c framework. Signed-off-by: Yunus Bas Signed-off-by: Andrej Picej Reviewed-by: Adam Thomson Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211124080654.2601135-1-andrej.picej@norik.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/da9063_wdt.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c index d79ce64e26a9..9adad1862bbd 100644 --- a/drivers/watchdog/da9063_wdt.c +++ b/drivers/watchdog/da9063_wdt.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -169,14 +170,19 @@ static int da9063_wdt_restart(struct watchdog_device *wdd, unsigned long action, void *data) { struct da9063 *da9063 = watchdog_get_drvdata(wdd); + struct i2c_client *client = to_i2c_client(da9063->dev); int ret; - ret = regmap_write(da9063->regmap, DA9063_REG_CONTROL_F, - DA9063_SHUTDOWN); - if (ret) + /* Don't use regmap because it is not atomic safe */ + ret = i2c_smbus_write_byte_data(client, DA9063_REG_CONTROL_F, + DA9063_SHUTDOWN); + if (ret < 0) dev_alert(da9063->dev, "Failed to shutdown (err = %d)\n", ret); + /* wait for reset to assert... */ + mdelay(500); + return ret; } From cd4eadf228dbfc6e81a2730a1fa635e9a593a449 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 21 Nov 2021 18:56:47 +0200 Subject: [PATCH 149/493] watchdog: s3c2410: Add Exynos850 support Exynos850 is a bit different from SoCs already supported in WDT driver: - AUTOMATIC_WDT_RESET_DISABLE register is removed, so its value is always 0; .disable_auto_reset callback is not set for that reason - MASK_WDT_RESET_REQUEST register is replaced with CLUSTERx_NONCPU_IN_EN register; instead of masking (disabling) WDT reset interrupt it's now enabled with the same value; .mask_reset callback is reused for that functionality though - To make WDT functional, WDT counter needs to be enabled in CLUSTERx_NONCPU_OUT register; it's done using .enable_counter callback Also Exynos850 has two CPU clusters, each has its own dedicated WDT instance. Different PMU registers and bits are used for each cluster. So driver data is now modified in probe, adding needed info depending on cluster index passed from device tree. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211121165647.26706-13-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 64 +++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 62d797a8bfdf..bb374b9fc163 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -56,6 +56,13 @@ #define EXYNOS5_RST_STAT_REG_OFFSET 0x0404 #define EXYNOS5_WDT_DISABLE_REG_OFFSET 0x0408 #define EXYNOS5_WDT_MASK_RESET_REG_OFFSET 0x040c +#define EXYNOS850_CLUSTER0_NONCPU_OUT 0x1220 +#define EXYNOS850_CLUSTER0_NONCPU_INT_EN 0x1244 +#define EXYNOS850_CLUSTER1_NONCPU_OUT 0x1620 +#define EXYNOS850_CLUSTER1_NONCPU_INT_EN 0x1644 + +#define EXYNOS850_CLUSTER0_WDTRESET_BIT 24 +#define EXYNOS850_CLUSTER1_WDTRESET_BIT 23 /** * DOC: Quirk flags for different Samsung watchdog IP-cores @@ -205,6 +212,30 @@ static const struct s3c2410_wdt_variant drv_data_exynos7 = { QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_AUTO_DISABLE, }; +static const struct s3c2410_wdt_variant drv_data_exynos850_cl0 = { + .mask_reset_reg = EXYNOS850_CLUSTER0_NONCPU_INT_EN, + .mask_bit = 2, + .mask_reset_inv = true, + .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, + .rst_stat_bit = EXYNOS850_CLUSTER0_WDTRESET_BIT, + .cnt_en_reg = EXYNOS850_CLUSTER0_NONCPU_OUT, + .cnt_en_bit = 7, + .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \ + QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN, +}; + +static const struct s3c2410_wdt_variant drv_data_exynos850_cl1 = { + .mask_reset_reg = EXYNOS850_CLUSTER1_NONCPU_INT_EN, + .mask_bit = 2, + .mask_reset_inv = true, + .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, + .rst_stat_bit = EXYNOS850_CLUSTER1_WDTRESET_BIT, + .cnt_en_reg = EXYNOS850_CLUSTER1_NONCPU_OUT, + .cnt_en_bit = 7, + .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \ + QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN, +}; + static const struct of_device_id s3c2410_wdt_match[] = { { .compatible = "samsung,s3c2410-wdt", .data = &drv_data_s3c2410 }, @@ -216,6 +247,8 @@ static const struct of_device_id s3c2410_wdt_match[] = { .data = &drv_data_exynos5420 }, { .compatible = "samsung,exynos7-wdt", .data = &drv_data_exynos7 }, + { .compatible = "samsung,exynos850-wdt", + .data = &drv_data_exynos850_cl0 }, {}, }; MODULE_DEVICE_TABLE(of, s3c2410_wdt_match); @@ -587,14 +620,40 @@ static inline const struct s3c2410_wdt_variant * s3c2410_get_wdt_drv_data(struct platform_device *pdev) { const struct s3c2410_wdt_variant *variant; + struct device *dev = &pdev->dev; - variant = of_device_get_match_data(&pdev->dev); + variant = of_device_get_match_data(dev); if (!variant) { /* Device matched by platform_device_id */ variant = (struct s3c2410_wdt_variant *) platform_get_device_id(pdev)->driver_data; } +#ifdef CONFIG_OF + /* Choose Exynos850 driver data w.r.t. cluster index */ + if (variant == &drv_data_exynos850_cl0) { + u32 index; + int err; + + err = of_property_read_u32(dev->of_node, + "samsung,cluster-index", &index); + if (err) { + dev_err(dev, "failed to get cluster index\n"); + return NULL; + } + + switch (index) { + case 0: + return &drv_data_exynos850_cl0; + case 1: + return &drv_data_exynos850_cl1; + default: + dev_err(dev, "wrong cluster index: %u\n", index); + return NULL; + } + } +#endif + return variant; } @@ -615,6 +674,9 @@ static int s3c2410wdt_probe(struct platform_device *pdev) wdt->wdt_device = s3c2410_wdd; wdt->drv_data = s3c2410_get_wdt_drv_data(pdev); + if (!wdt->drv_data) + return -EINVAL; + if (wdt->drv_data->quirks & QUIRKS_HAVE_PMUREG) { wdt->pmureg = syscon_regmap_lookup_by_phandle(dev->of_node, "samsung,syscon-phandle"); From 753150ada5e9c43ffa4cd3de552923656e962519 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Oct 2021 17:31:34 -0700 Subject: [PATCH 150/493] ARC: thread_info.h: correct two typos in a comment Fix typos of "separately" and "remains". Signed-off-by: Randy Dunlap Suggested-by: Matthew Wilcox # "remains" Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Vineet Gupta --- arch/arc/include/asm/thread_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index c0942c24d401..d36863e34bfc 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -99,8 +99,8 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) /* * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it. - * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a - * syscall, so all that reamins to be tested is _TIF_WORK_MASK + * SYSCALL_TRACE is anyway separately/unconditionally tested right after a + * syscall, so all that remains to be tested is _TIF_WORK_MASK */ #endif /* _ASM_THREAD_INFO_H */ From e296c2e1cd70b2dd9b3c294c37ed4e14833b3f31 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 26 Nov 2021 22:23:12 +0000 Subject: [PATCH 151/493] ARC: perf: Remove redundant initialization of variable idx The variable idx is being initialized with a value that is never read, it is being updated later on. The assignment is redundant and can be removed. Reviewed-by: Vladimir Isaev Signed-off-by: Colin Ian King Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 145722f80c9b..f5dd799ddb9e 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -361,7 +361,7 @@ static int arc_pmu_add(struct perf_event *event, int flags) { struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; + int idx; idx = ffz(pmu_cpu->used_mask[0]); if (idx == arc_pmu->n_counters) From 7e5b06b8c1f8f58fa7b7bcec3ccf490ae6f0810e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Feb 2021 15:28:49 -0800 Subject: [PATCH 152/493] arc: Replace lkml.org links with lore As started by commit 05a5f51ca566 ("Documentation: Replace lkml.org links with lore"), replace lkml.org links with lore to better use a single source that's more likely to stay available long-term. Signed-off-by: Kees Cook Signed-off-by: Vineet Gupta --- arch/arc/include/asm/irqflags-compact.h | 8 ++++++-- arch/arc/mm/dma.c | 2 +- arch/arc/plat-axs10x/axs10x.c | 2 +- arch/arc/plat-hsdk/platform.c | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 863d63ad18d6..0d63e568d64c 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -50,8 +50,12 @@ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register) * * Noted at the time of Abilis Timer List corruption - * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67 - * Reasoning : https://lkml.org/lkml/2013/4/8/15 + * + * Orig Bug + Rejected solution: + * https://lore.kernel.org/lkml/1364553218-31255-1-git-send-email-vgupta@synopsys.com + * + * Reasoning: + * https://lore.kernel.org/lkml/CA+55aFyFWjpSVQM6M266tKrG_ZXJzZ-nYejpmXYQXbrr42mGPQ@mail.gmail.com * ******************************************************************/ diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 517988e60cfc..2a7fbbb83b70 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -32,7 +32,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) /* * Cache operations depending on function and direction argument, inspired by - * https://lkml.org/lkml/2018/5/18/979 + * https://lore.kernel.org/lkml/20180518175004.GF17671@n2100.armlinux.org.uk * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20] * dma-mapping: provide a generic dma-noncoherent implementation)" * diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index 63ea5a606ecd..b821df7b0089 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -50,7 +50,7 @@ static void __init axs10x_enable_gpio_intc_wire(void) * Current implementation of "irq-dw-apb-ictl" driver doesn't work well * with stacked INTCs. In particular problem happens if its master INTC * not yet instantiated. See discussion here - - * https://lkml.org/lkml/2015/3/4/755 + * https://lore.kernel.org/lkml/54F6FE2C.7020309@synopsys.com * * So setup the first gpio block as a passive pass thru and hide it from * DT hardware topology - connect MB intc directly to cpu intc diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index b3ea1fa11f87..c4a875b22352 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -52,7 +52,7 @@ static void __init hsdk_enable_gpio_intc_wire(void) * Current implementation of "irq-dw-apb-ictl" driver doesn't work well * with stacked INTCs. In particular problem happens if its master INTC * not yet instantiated. See discussion here - - * https://lkml.org/lkml/2015/3/4/755 + * https://lore.kernel.org/lkml/54F6FE2C.7020309@synopsys.com * * So setup the first gpio block as a passive pass thru and hide it from * DT hardware topology - connect intc directly to cpu intc From 1b2a62becacef79c2340ba39f662cfc313b72fb6 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 16 Dec 2021 13:33:45 -0800 Subject: [PATCH 153/493] ARC: perf: fix misleading comment about pmu vs counter stop Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index f5dd799ddb9e..35e5fe708840 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -328,7 +328,7 @@ static void arc_pmu_stop(struct perf_event *event, int flags) } if (!(event->hw.state & PERF_HES_STOPPED)) { - /* stop ARC pmu here */ + /* stop hw counter here */ write_aux_reg(ARC_REG_PCT_INDEX, idx); /* condition code #0 is always "never" */ From ca295ffb9102c8cf619e2a38d5383bf7c08ceb62 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 29 Nov 2021 12:37:07 -0800 Subject: [PATCH 154/493] arc: perf: Move static structs to where they're really used It is all well described by Stephen Rothwell who initially spotted that: ----------------------------->8---------------------------- After merging the origin tree, today's linux-next build (arc haps_hs_smp_defconfig+kselftest) produced these warnings: arch/arc/include/asm/perf_event.h:126:27: warning: 'arc_pmu_cache_map' defined but not used [-Wunused-const-variable=] arch/arc/include/asm/perf_event.h:91:27: warning: 'arc_pmu_ev_hw_map' defined but not used [-Wunused-const-variable=] Introduced by commit 0dd450fe13da ("ARC: Add perf support for ARC700 cores") The 2 static arrays should be moved into arch/arc/kernel/perf_event.c (the only place that uses them). We get the warning because perf_event.h is also included by arch/arc/kernel/unaligned.c. ----------------------------->8---------------------------- Could be easily reproduced by running make with "W=1" on any up-to-date sources, when extra warnings get enabled (in particular "-Wunused-const-variable"), otherwise disabled by default in the top-level Makefile as "These warnings generated too much noise in a regular build". Cc: Mischa Jonker Reported-by: Stephen Rothwell Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/include/asm/perf_event.h | 162 ------------------------------ arch/arc/kernel/perf_event.c | 162 ++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 162 deletions(-) diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index e1971d34ef30..4c919c0f4b30 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -63,166 +63,4 @@ struct arc_reg_cc_build { #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 8) -/* - * Some ARC pct quirks: - * - * PERF_COUNT_HW_STALLED_CYCLES_BACKEND - * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND - * The ARC 700 can either measure stalls per pipeline stage, or all stalls - * combined; for now we assign all stalls to STALLED_CYCLES_BACKEND - * and all pipeline flushes (e.g. caused by mispredicts, etc.) to - * STALLED_CYCLES_FRONTEND. - * - * We could start multiple performance counters and combine everything - * afterwards, but that makes it complicated. - * - * Note that I$ cache misses aren't counted by either of the two! - */ - -/* - * ARC PCT has hardware conditions with fixed "names" but variable "indexes" - * (based on a specific RTL build) - * Below is the static map between perf generic/arc specific event_id and - * h/w condition names. - * At the time of probe, we loop thru each index and find it's name to - * complete the mapping of perf event_id to h/w index as latter is needed - * to program the counter really - */ -static const char * const arc_pmu_ev_hw_map[] = { - /* count cycles */ - [PERF_COUNT_HW_CPU_CYCLES] = "crun", - [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", - [PERF_COUNT_HW_BUS_CYCLES] = "crun", - - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", - - /* counts condition */ - [PERF_COUNT_HW_INSTRUCTIONS] = "iall", - /* All jump instructions that are taken */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak", -#ifdef CONFIG_ISA_ARCV2 - [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", -#else - [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ - [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ -#endif - [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */ - [PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */ - - [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */ - [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */ - [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ - [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */ - [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */ - - [PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc", /* Instr: mem read cached */ - [PERF_COUNT_HW_CACHE_MISSES] = "dclm", /* D-cache Load Miss */ -}; - -#define C(_x) PERF_COUNT_HW_CACHE_##_x -#define CACHE_OP_UNSUPPORTED 0xffff - -static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, - [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = PERF_COUNT_ARC_STC, - [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = PERF_COUNT_HW_INSTRUCTIONS, - [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, - [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB, - }, - /* DTLB LD/ST Miss not segregated by h/w*/ - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, - [C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - #endif /* __ASM_PERF_EVENT_H */ diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 35e5fe708840..adff957962da 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -17,6 +17,168 @@ /* HW holds 8 symbols + one for null terminator */ #define ARCPMU_EVENT_NAME_LEN 9 +/* + * Some ARC pct quirks: + * + * PERF_COUNT_HW_STALLED_CYCLES_BACKEND + * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND + * The ARC 700 can either measure stalls per pipeline stage, or all stalls + * combined; for now we assign all stalls to STALLED_CYCLES_BACKEND + * and all pipeline flushes (e.g. caused by mispredicts, etc.) to + * STALLED_CYCLES_FRONTEND. + * + * We could start multiple performance counters and combine everything + * afterwards, but that makes it complicated. + * + * Note that I$ cache misses aren't counted by either of the two! + */ + +/* + * ARC PCT has hardware conditions with fixed "names" but variable "indexes" + * (based on a specific RTL build) + * Below is the static map between perf generic/arc specific event_id and + * h/w condition names. + * At the time of probe, we loop thru each index and find it's name to + * complete the mapping of perf event_id to h/w index as latter is needed + * to program the counter really + */ +static const char * const arc_pmu_ev_hw_map[] = { + /* count cycles */ + [PERF_COUNT_HW_CPU_CYCLES] = "crun", + [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", + [PERF_COUNT_HW_BUS_CYCLES] = "crun", + + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", + + /* counts condition */ + [PERF_COUNT_HW_INSTRUCTIONS] = "iall", + /* All jump instructions that are taken */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak", +#ifdef CONFIG_ISA_ARCV2 + [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", +#else + [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ + [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ +#endif + [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */ + [PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */ + + [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */ + [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */ + [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ + [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */ + [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */ + + [PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc", /* Instr: mem read cached */ + [PERF_COUNT_HW_CACHE_MISSES] = "dclm", /* D-cache Load Miss */ +}; + +#define C(_x) PERF_COUNT_HW_CACHE_##_x +#define CACHE_OP_UNSUPPORTED 0xffff + +static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, + [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PERF_COUNT_ARC_STC, + [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PERF_COUNT_HW_INSTRUCTIONS, + [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, + [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB, + }, + /* DTLB LD/ST Miss not segregated by h/w*/ + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + [C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + enum arc_pmu_attr_groups { ARCPMU_ATTR_GR_EVENTS, ARCPMU_ATTR_GR_FORMATS, From 8f67f65d121cc3bbb4ffaae80e880aeb307d49f4 Mon Sep 17 00:00:00 2001 From: Yihao Han Date: Tue, 9 Nov 2021 19:07:12 -0800 Subject: [PATCH 155/493] arc: use swap() to make code cleaner Use the macro 'swap()' defined in 'include/linux/minmax.h' to avoid opencoding it. Signed-off-by: Yihao Han Signed-off-by: Vineet Gupta --- arch/arc/kernel/unwind.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 9e28058cdba8..200270a94558 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -245,14 +245,9 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) { struct eh_frame_hdr_table_entry *e1 = p1; struct eh_frame_hdr_table_entry *e2 = p2; - unsigned long v; - v = e1->start; - e1->start = e2->start; - e2->start = v; - v = e1->fde; - e1->fde = e2->fde; - e2->fde = v; + swap(e1->start, e2->start); + swap(e1->fde, e2->fde); } static void init_unwind_hdr(struct unwind_table *table, From 063e458c7aafc694f2491de7f8f10ff470263d8d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 28 Dec 2021 15:44:07 +0100 Subject: [PATCH 156/493] orangefs: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the orangfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Cc: Mike Marshall Cc: Martin Brandenburg Cc: devel@lists.orangefs.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-sysfs.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 3627ea946402..de80b62553bb 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -894,10 +894,11 @@ static struct attribute *orangefs_default_attrs[] = { &perf_time_interval_secs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(orangefs_default); static struct kobj_type orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = orangefs_default_attrs, + .default_groups = orangefs_default_groups, }; static struct orangefs_attribute acache_hard_limit_attribute = @@ -931,10 +932,11 @@ static struct attribute *acache_orangefs_default_attrs[] = { &acache_timeout_msecs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(acache_orangefs_default); static struct kobj_type acache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = acache_orangefs_default_attrs, + .default_groups = acache_orangefs_default_groups, }; static struct orangefs_attribute capcache_hard_limit_attribute = @@ -968,10 +970,11 @@ static struct attribute *capcache_orangefs_default_attrs[] = { &capcache_timeout_secs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(capcache_orangefs_default); static struct kobj_type capcache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = capcache_orangefs_default_attrs, + .default_groups = capcache_orangefs_default_groups, }; static struct orangefs_attribute ccache_hard_limit_attribute = @@ -1005,10 +1008,11 @@ static struct attribute *ccache_orangefs_default_attrs[] = { &ccache_timeout_secs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(ccache_orangefs_default); static struct kobj_type ccache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = ccache_orangefs_default_attrs, + .default_groups = ccache_orangefs_default_groups, }; static struct orangefs_attribute ncache_hard_limit_attribute = @@ -1042,10 +1046,11 @@ static struct attribute *ncache_orangefs_default_attrs[] = { &ncache_timeout_msecs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(ncache_orangefs_default); static struct kobj_type ncache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = ncache_orangefs_default_attrs, + .default_groups = ncache_orangefs_default_groups, }; static struct orangefs_attribute pc_acache_attribute = @@ -1072,10 +1077,11 @@ static struct attribute *pc_orangefs_default_attrs[] = { &pc_ncache_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(pc_orangefs_default); static struct kobj_type pc_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = pc_orangefs_default_attrs, + .default_groups = pc_orangefs_default_groups, }; static struct orangefs_attribute stats_reads_attribute = @@ -1095,10 +1101,11 @@ static struct attribute *stats_orangefs_default_attrs[] = { &stats_writes_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(stats_orangefs_default); static struct kobj_type stats_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = stats_orangefs_default_attrs, + .default_groups = stats_orangefs_default_groups, }; static struct kobject *orangefs_obj; From 40a74870b2d1d3d44e13b3b73c6571dd34f5614d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 27 Dec 2021 19:09:18 +0100 Subject: [PATCH 157/493] orangefs: Fix the size of a memory allocation in orangefs_bufmap_alloc() 'buffer_index_array' really looks like a bitmap. So it should be allocated as such. When kzalloc is called, a number of bytes is expected, but a number of longs is passed instead. In get(), if not enough memory is allocated, un-allocated memory may be read or written. So use bitmap_zalloc() to safely allocate the correct memory size and avoid un-expected behavior. While at it, change the corresponding kfree() into bitmap_free() to keep the semantic. Fixes: ea2c9c9f6574 ("orangefs: bufmap rewrite") Signed-off-by: Christophe JAILLET Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-bufmap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 538e839590ef..b501dc07f922 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -176,7 +176,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap) { kfree(bufmap->page_array); kfree(bufmap->desc_array); - kfree(bufmap->buffer_index_array); + bitmap_free(bufmap->buffer_index_array); kfree(bufmap); } @@ -226,8 +226,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) bufmap->desc_size = user_desc->size; bufmap->desc_shift = ilog2(bufmap->desc_size); - bufmap->buffer_index_array = - kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL); + bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL); if (!bufmap->buffer_index_array) goto out_free_bufmap; @@ -250,7 +249,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) out_free_desc_array: kfree(bufmap->desc_array); out_free_index_array: - kfree(bufmap->buffer_index_array); + bitmap_free(bufmap->buffer_index_array); out_free_bufmap: kfree(bufmap); out: From 0b66fa776c361824a700793e34f866bf479dac92 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 21 Dec 2021 00:48:09 +0000 Subject: [PATCH 158/493] cifs: remove redundant assignment to pointer p The pointer p is being assigned with a value that is never read. The pointer is being re-assigned a different value inside the do-while loop. The assignment is redundant and can be removed. Signed-off-by: Colin Ian King Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index dca42aa87d30..a62a4305f79d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -773,7 +773,7 @@ cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb) sep = CIFS_DIR_SEP(cifs_sb); dentry = dget(sb->s_root); - p = s = full_path; + s = full_path; do { struct inode *dir = d_inode(dentry); From d1a931ce2e3b7761d293ba8e0bde2b0180f456e9 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 19 Jul 2021 12:46:53 +0000 Subject: [PATCH 159/493] cifs: track individual channel status using chans_need_reconnect We needed a way to identify the channels under the smb session which are in reconnect, so that the traffic to other channels can continue. So I replaced the bool need_reconnect with a bitmask identifying all the channels that need reconnection (named chans_need_reconnect). When a channel needs reconnection, the bit corresponding to the index of the server in ses->chans is used to set this bitmask. Checking if no channels or all the channels need reconnect then becomes very easy. Also wrote some helper macros for checking and setting the bits. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 15 +++++++++- fs/cifs/cifsproto.h | 13 +++++++++ fs/cifs/cifssmb.c | 48 ++++++++++++++++++++++++++++---- fs/cifs/connect.c | 33 ++++++++++++++++++---- fs/cifs/sess.c | 67 +++++++++++++++++++++++++++++++++++++++++++-- fs/cifs/smb2pdu.c | 59 +++++++++++++++++++++++++++++++++------ 6 files changed, 211 insertions(+), 24 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index be74606724c7..91878e84c637 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -939,7 +939,6 @@ struct cifs_ses { struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ enum securityEnum sectype; /* what security flavor was specified? */ bool sign; /* is signing required? */ - bool need_reconnect:1; /* connection reset, uid now invalid */ bool domainAuto:1; bool binding:1; /* are we binding the session? */ __u16 session_flags; @@ -969,11 +968,25 @@ struct cifs_ses { spinlock_t chan_lock; /* ========= begin: protected by chan_lock ======== */ #define CIFS_MAX_CHANNELS 16 +#define CIFS_ALL_CHANNELS_SET(ses) \ + ((1UL << (ses)->chan_count) - 1) +#define CIFS_ALL_CHANS_NEED_RECONNECT(ses) \ + ((ses)->chans_need_reconnect == CIFS_ALL_CHANNELS_SET(ses)) +#define CIFS_CHAN_NEEDS_RECONNECT(ses, index) \ + test_bit((index), &(ses)->chans_need_reconnect) + struct cifs_chan chans[CIFS_MAX_CHANNELS]; struct cifs_chan *binding_chan; size_t chan_count; size_t chan_max; atomic_t chan_seq; /* round robin state */ + + /* + * chans_need_reconnect is a bitmap indicating which of the channels + * under this smb session needs to be reconnected. + * If not multichannel session, only one bit will be used. + */ + unsigned long chans_need_reconnect; /* ========= end: protected by chan_lock ======== */ }; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 4f5a3e857df4..2a821a8801d2 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -601,6 +601,19 @@ bool is_server_using_iface(struct TCP_Server_Info *server, bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface); void cifs_ses_mark_for_reconnect(struct cifs_ses *ses); +unsigned int +cifs_ses_get_chan_index(struct cifs_ses *ses, + struct TCP_Server_Info *server); +void +cifs_chan_set_need_reconnect(struct cifs_ses *ses, + struct TCP_Server_Info *server); +void +cifs_chan_clear_need_reconnect(struct cifs_ses *ses, + struct TCP_Server_Info *server); +bool +cifs_chan_needs_reconnect(struct cifs_ses *ses, + struct TCP_Server_Info *server); + void extract_unc_hostname(const char *unc, const char **h, size_t *len); int copy_path_name(char *dst, const char *src); int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 243d17696f06..cbb0d55c1267 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -166,8 +166,12 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) retries = server->nr_targets; } - if (!ses->need_reconnect && !tcon->need_reconnect) + spin_lock(&ses->chan_lock); + if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) { + spin_unlock(&ses->chan_lock); return 0; + } + spin_unlock(&ses->chan_lock); nls_codepage = load_nls_default(); @@ -188,8 +192,25 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) goto out; } + /* + * need to prevent multiple threads trying to simultaneously + * reconnect the same SMB session + */ + spin_lock(&ses->chan_lock); + if (!cifs_chan_needs_reconnect(ses, server)) { + spin_unlock(&ses->chan_lock); + /* this just means that we only need to tcon */ + if (tcon->need_reconnect) + goto skip_sess_setup; + + rc = -EHOSTDOWN; + mutex_unlock(&ses->session_mutex); + goto out; + } + spin_unlock(&ses->chan_lock); + rc = cifs_negotiate_protocol(0, ses); - if (rc == 0 && ses->need_reconnect) + if (!rc) rc = cifs_setup_session(0, ses, nls_codepage); /* do we need to reconnect tcon? */ @@ -198,6 +219,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) goto out; } +skip_sess_setup: cifs_mark_open_files_invalid(tcon); rc = cifs_tree_connect(0, tcon, nls_codepage); mutex_unlock(&ses->session_mutex); @@ -337,8 +359,13 @@ static int smb_init_no_reconnect(int smb_command, int wct, struct cifs_tcon *tcon, void **request_buf, void **response_buf) { - if (tcon->ses->need_reconnect || tcon->need_reconnect) + spin_lock(&tcon->ses->chan_lock); + if (cifs_chan_needs_reconnect(tcon->ses, tcon->ses->server) || + tcon->need_reconnect) { + spin_unlock(&tcon->ses->chan_lock); return -EHOSTDOWN; + } + spin_unlock(&tcon->ses->chan_lock); return __smb_init(smb_command, wct, tcon, request_buf, response_buf); } @@ -600,8 +627,12 @@ CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon) * the tcon is no longer on the list, so no need to take lock before * checking this. */ - if ((tcon->need_reconnect) || (tcon->ses->need_reconnect)) - return 0; + spin_lock(&tcon->ses->chan_lock); + if ((tcon->need_reconnect) || CIFS_ALL_CHANS_NEED_RECONNECT(tcon->ses)) { + spin_unlock(&tcon->ses->chan_lock); + return -EIO; + } + spin_unlock(&tcon->ses->chan_lock); rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, (void **)&smb_buffer); @@ -696,9 +727,14 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) return -EIO; mutex_lock(&ses->session_mutex); - if (ses->need_reconnect) + spin_lock(&ses->chan_lock); + if (CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { + spin_unlock(&ses->chan_lock); goto session_already_dead; /* no need to send SMBlogoff if uid already closed due to reconnect */ + } + spin_unlock(&ses->chan_lock); + rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB); if (rc) { mutex_unlock(&ses->session_mutex); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1060164b984a..fa80a23f9fcf 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -191,11 +191,23 @@ static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { - ses->need_reconnect = true; + spin_lock(&ses->chan_lock); + if (cifs_chan_needs_reconnect(ses, server)) + goto next_session; + + cifs_chan_set_need_reconnect(ses, server); + + /* If all channels need reconnect, then tcon needs reconnect */ + if (!CIFS_ALL_CHANS_NEED_RECONNECT(ses)) + goto next_session; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) tcon->need_reconnect = true; if (ses->tcon_ipc) ses->tcon_ipc->need_reconnect = true; + +next_session: + spin_unlock(&ses->chan_lock); } spin_unlock(&cifs_tcp_ses_lock); @@ -1988,7 +2000,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) ses->status); mutex_lock(&ses->session_mutex); - if (ses->need_reconnect) { + spin_lock(&ses->chan_lock); + if (cifs_chan_needs_reconnect(ses, server)) { + spin_unlock(&ses->chan_lock); cifs_dbg(FYI, "Session needs reconnect\n"); rc = cifs_negotiate_protocol(xid, ses); @@ -2009,7 +2023,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) free_xid(xid); return ERR_PTR(rc); } + spin_lock(&ses->chan_lock); } + spin_unlock(&ses->chan_lock); mutex_unlock(&ses->session_mutex); /* existing SMB ses has a server reference already */ @@ -2067,6 +2083,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) ses->chans[0].server = server; ses->chan_count = 1; ses->chan_max = ctx->multichannel ? ctx->max_channels:1; + ses->chans_need_reconnect = 1; spin_unlock(&ses->chan_lock); rc = cifs_negotiate_protocol(xid, ses); @@ -2081,7 +2098,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) if (rc) goto get_ses_fail; - /* success, put it on the list and add it as first channel */ + /* + * success, put it on the list and add it as first channel + * note: the session becomes active soon after this. So you'll + * need to lock before changing something in the session. + */ spin_lock(&cifs_tcp_ses_lock); list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -2161,6 +2182,9 @@ cifs_put_tcon(struct cifs_tcon *tcon) /* tc_count can never go negative */ WARN_ON(tcon->tc_count < 0); + list_del_init(&tcon->tcon_list); + spin_unlock(&cifs_tcp_ses_lock); + if (tcon->use_witness) { int rc; @@ -2171,9 +2195,6 @@ cifs_put_tcon(struct cifs_tcon *tcon) } } - list_del_init(&tcon->tcon_list); - spin_unlock(&cifs_tcp_ses_lock); - xid = get_xid(); if (ses->server->ops->tree_disconnect) ses->server->ops->tree_disconnect(xid, tcon); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 035dc3e245dc..ba8543ccb298 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -65,6 +65,53 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface) return false; } +unsigned int +cifs_ses_get_chan_index(struct cifs_ses *ses, + struct TCP_Server_Info *server) +{ + unsigned int i; + + for (i = 0; i < ses->chan_count; i++) { + if (ses->chans[i].server == server) + return i; + } + + /* If we didn't find the channel, it is likely a bug */ + WARN_ON(1); + return 0; +} + +void +cifs_chan_set_need_reconnect(struct cifs_ses *ses, + struct TCP_Server_Info *server) +{ + unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + + set_bit(chan_index, &ses->chans_need_reconnect); + cifs_dbg(FYI, "Set reconnect bitmask for chan %u; now 0x%lx\n", + chan_index, ses->chans_need_reconnect); +} + +void +cifs_chan_clear_need_reconnect(struct cifs_ses *ses, + struct TCP_Server_Info *server) +{ + unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + + clear_bit(chan_index, &ses->chans_need_reconnect); + cifs_dbg(FYI, "Cleared reconnect bitmask for chan %u; now 0x%lx\n", + chan_index, ses->chans_need_reconnect); +} + +bool +cifs_chan_needs_reconnect(struct cifs_ses *ses, + struct TCP_Server_Info *server) +{ + unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + + return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index); +} + /* returns number of channels added */ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) { @@ -306,11 +353,21 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, spin_lock(&ses->chan_lock); ses->chan_count++; atomic_set(&ses->chan_seq, 0); + + /* Mark this channel as needing connect/setup */ + cifs_chan_set_need_reconnect(ses, chan->server); spin_unlock(&ses->chan_lock); out: ses->binding = false; ses->binding_chan = NULL; + + if (rc && chan->server) { + /* we rely on all bits beyond chan_count to be clear */ + cifs_chan_clear_need_reconnect(ses, chan->server); + ses->chan_count--; + } + mutex_unlock(&ses->session_mutex); if (rc && chan->server) @@ -998,9 +1055,15 @@ sess_establish_session(struct sess_data *sess_data) mutex_unlock(&ses->server->srv_mutex); cifs_dbg(FYI, "CIFS session established successfully\n"); + if (ses->binding) + cifs_chan_clear_need_reconnect(ses, ses->binding_chan->server); + else + cifs_chan_clear_need_reconnect(ses, ses->server); + + /* keep existing ses state if binding */ spin_lock(&GlobalMid_Lock); - ses->status = CifsGood; - ses->need_reconnect = false; + if (!ses->binding) + ses->status = CifsGood; spin_unlock(&GlobalMid_Lock); return 0; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8b3670388cda..f3b2eef36ec1 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -232,8 +232,15 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, retries = server->nr_targets; } - if (!tcon->ses->need_reconnect && !tcon->need_reconnect) + spin_lock(&ses->chan_lock); + if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) { + spin_unlock(&ses->chan_lock); return 0; + } + cifs_dbg(FYI, "sess reconnect mask: 0x%lx, tcon reconnect: %d", + tcon->ses->chans_need_reconnect, + tcon->need_reconnect); + spin_unlock(&ses->chan_lock); nls_codepage = load_nls_default(); @@ -262,8 +269,26 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, ses->binding_chan = cifs_ses_find_chan(ses, server); } + /* + * need to prevent multiple threads trying to simultaneously + * reconnect the same SMB session + */ + spin_lock(&ses->chan_lock); + if (!cifs_chan_needs_reconnect(ses, server)) { + spin_unlock(&ses->chan_lock); + + /* this just means that we only need to tcon */ + if (tcon->need_reconnect) + goto skip_sess_setup; + + rc = -EHOSTDOWN; + mutex_unlock(&ses->session_mutex); + goto out; + } + spin_unlock(&ses->chan_lock); + rc = cifs_negotiate_protocol(0, tcon->ses); - if (!rc && tcon->ses->need_reconnect) { + if (!rc) { rc = cifs_setup_session(0, tcon->ses, nls_codepage); if ((rc == -EACCES) && !tcon->retry) { rc = -EHOSTDOWN; @@ -284,6 +309,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, goto out; } +skip_sess_setup: cifs_mark_open_files_invalid(tcon); if (tcon->use_persistent) tcon->need_reopen_files = true; @@ -1359,13 +1385,19 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "SMB2/3 session established successfully\n"); + + spin_lock(&ses->chan_lock); + if (ses->binding) + cifs_chan_clear_need_reconnect(ses, ses->binding_chan->server); + else + cifs_chan_clear_need_reconnect(ses, ses->server); + spin_unlock(&ses->chan_lock); + /* keep existing ses state if binding */ - if (!ses->binding) { - spin_lock(&GlobalMid_Lock); + spin_lock(&GlobalMid_Lock); + if (!ses->binding) ses->status = CifsGood; - ses->need_reconnect = false; - spin_unlock(&GlobalMid_Lock); - } + spin_unlock(&GlobalMid_Lock); return rc; } @@ -1704,8 +1736,12 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) return -EIO; /* no need to send SMB logoff if uid already closed due to reconnect */ - if (ses->need_reconnect) + spin_lock(&ses->chan_lock); + if (CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { + spin_unlock(&ses->chan_lock); goto smb2_session_already_dead; + } + spin_unlock(&ses->chan_lock); rc = smb2_plain_req_init(SMB2_LOGOFF, NULL, ses->server, (void **) &req, &total_len); @@ -1913,8 +1949,13 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) if (!ses || !(ses->server)) return -EIO; - if ((tcon->need_reconnect) || (tcon->ses->need_reconnect)) + spin_lock(&ses->chan_lock); + if ((tcon->need_reconnect) || + (CIFS_ALL_CHANS_NEED_RECONNECT(tcon->ses))) { + spin_unlock(&ses->chan_lock); return 0; + } + spin_unlock(&ses->chan_lock); close_cached_dir_lease(&tcon->crfid); From f486ef8e2003f6c308d0db81ea116c880a760d4f Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 19 Jul 2021 13:54:16 +0000 Subject: [PATCH 160/493] cifs: use the chans_need_reconnect bitmap for reconnect status We use the concept of "binding" when one of the secondary channel is in the process of connecting/reconnecting to the server. Till this binding process completes, and the channel is bound to an existing session, we redirect traffic from other established channels on the binding channel, effectively blocking all traffic till individual channels get reconnected. With my last set of commits, we can get rid of this binding serialization. We now have a bitmap of connection states for each channel. We will use this bitmap instead for tracking channel status. Having a bitmap also now enables us to keep the session alive, as long as even a single channel underneath is alive. Unfortunately, this also meant that we need to supply the tcp connection info for the channel during all negotiate and session setup functions. These changes have resulted in a slightly bigger code churn. However, I expect perf and robustness improvements in the mchan scenario after this change. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifs_spnego.c | 4 +- fs/cifs/cifs_spnego.h | 3 +- fs/cifs/cifsglob.h | 48 ++++------------ fs/cifs/cifsproto.h | 15 +++-- fs/cifs/cifssmb.c | 12 ++-- fs/cifs/connect.c | 36 ++++++++---- fs/cifs/ntlmssp.h | 2 + fs/cifs/sess.c | 123 +++++++++++++++++++--------------------- fs/cifs/smb1ops.c | 10 ++-- fs/cifs/smb2misc.c | 4 +- fs/cifs/smb2ops.c | 8 ++- fs/cifs/smb2pdu.c | 114 +++++++++++++++++++++---------------- fs/cifs/smb2proto.h | 6 +- fs/cifs/smb2transport.c | 30 ++++++---- fs/cifs/transport.c | 28 ++++----- 15 files changed, 236 insertions(+), 207 deletions(-) diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 353bd0dd7026..342717bf1dc2 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -84,9 +84,9 @@ struct key_type cifs_spnego_key_type = { /* get a key struct with a SPNEGO security blob, suitable for session setup */ struct key * -cifs_get_spnego_key(struct cifs_ses *sesInfo) +cifs_get_spnego_key(struct cifs_ses *sesInfo, + struct TCP_Server_Info *server) { - struct TCP_Server_Info *server = cifs_ses_server(sesInfo); struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr; struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr; char *description, *dp; diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index e6a0451877d4..7f102ffeb675 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h @@ -29,7 +29,8 @@ struct cifs_spnego_msg { #ifdef __KERNEL__ extern struct key_type cifs_spnego_key_type; -extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo); +extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo, + struct TCP_Server_Info *server); #endif /* KERNEL */ #endif /* _CIFS_SPNEGO_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 91878e84c637..76b4adc7d738 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -263,13 +263,16 @@ struct smb_version_operations { /* check if we need to negotiate */ bool (*need_neg)(struct TCP_Server_Info *); /* negotiate to the server */ - int (*negotiate)(const unsigned int, struct cifs_ses *); + int (*negotiate)(const unsigned int xid, + struct cifs_ses *ses, + struct TCP_Server_Info *server); /* set negotiated write size */ unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* set negotiated read size */ unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* setup smb sessionn */ int (*sess_setup)(const unsigned int, struct cifs_ses *, + struct TCP_Server_Info *server, const struct nls_table *); /* close smb session */ int (*logoff)(const unsigned int, struct cifs_ses *); @@ -414,7 +417,8 @@ struct smb_version_operations { void (*set_lease_key)(struct inode *, struct cifs_fid *); /* generate new lease key */ void (*new_lease_key)(struct cifs_fid *); - int (*generate_signingkey)(struct cifs_ses *); + int (*generate_signingkey)(struct cifs_ses *ses, + struct TCP_Server_Info *server); int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *, bool allocate_crypto); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, @@ -940,15 +944,12 @@ struct cifs_ses { enum securityEnum sectype; /* what security flavor was specified? */ bool sign; /* is signing required? */ bool domainAuto:1; - bool binding:1; /* are we binding the session? */ __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; - __u8 binding_preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; - /* * Network interfaces available on the server this session is * connected to. @@ -976,7 +977,6 @@ struct cifs_ses { test_bit((index), &(ses)->chans_need_reconnect) struct cifs_chan chans[CIFS_MAX_CHANNELS]; - struct cifs_chan *binding_chan; size_t chan_count; size_t chan_max; atomic_t chan_seq; /* round robin state */ @@ -985,42 +985,16 @@ struct cifs_ses { * chans_need_reconnect is a bitmap indicating which of the channels * under this smb session needs to be reconnected. * If not multichannel session, only one bit will be used. + * + * We will ask for sess and tcon reconnection only if all the + * channels are marked for needing reconnection. This will + * enable the sessions on top to continue to live till any + * of the channels below are active. */ unsigned long chans_need_reconnect; /* ========= end: protected by chan_lock ======== */ }; -/* - * When binding a new channel, we need to access the channel which isn't fully - * established yet. - */ - -static inline -struct cifs_chan *cifs_ses_binding_channel(struct cifs_ses *ses) -{ - if (ses->binding) - return ses->binding_chan; - else - return NULL; -} - -/* - * Returns the server pointer of the session. When binding a new - * channel this returns the last channel which isn't fully established - * yet. - * - * This function should be use for negprot/sess.setup codepaths. For - * the other requests see cifs_pick_channel(). - */ -static inline -struct TCP_Server_Info *cifs_ses_server(struct cifs_ses *ses) -{ - if (ses->binding) - return ses->binding_chan->server; - else - return ses->server; -} - static inline bool cap_unix(struct cifs_ses *ses) { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 2a821a8801d2..09356a9abe9c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -164,6 +164,7 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct, extern enum securityEnum select_sectype(struct TCP_Server_Info *server, enum securityEnum requested); extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, + struct TCP_Server_Info *server, const struct nls_table *nls_cp); extern struct timespec64 cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec64); @@ -293,11 +294,15 @@ extern int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc); extern int cifs_negotiate_protocol(const unsigned int xid, - struct cifs_ses *ses); + struct cifs_ses *ses, + struct TCP_Server_Info *server); extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, + struct TCP_Server_Info *server, struct nls_table *nls_info); extern int cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required); -extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); +extern int CIFSSMBNegotiate(const unsigned int xid, + struct cifs_ses *ses, + struct TCP_Server_Info *server); extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, @@ -504,8 +509,10 @@ extern int cifs_verify_signature(struct smb_rqst *rqst, extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); extern void cifs_crypto_secmech_release(struct TCP_Server_Info *server); extern int calc_seckey(struct cifs_ses *); -extern int generate_smb30signingkey(struct cifs_ses *); -extern int generate_smb311signingkey(struct cifs_ses *); +extern int generate_smb30signingkey(struct cifs_ses *ses, + struct TCP_Server_Info *server); +extern int generate_smb311signingkey(struct cifs_ses *ses, + struct TCP_Server_Info *server); extern int CIFSSMBCopy(unsigned int xid, struct cifs_tcon *source_tcon, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index cbb0d55c1267..8b3e5be483bc 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -199,7 +199,8 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) spin_lock(&ses->chan_lock); if (!cifs_chan_needs_reconnect(ses, server)) { spin_unlock(&ses->chan_lock); - /* this just means that we only need to tcon */ + + /* this means that we only need to tree connect */ if (tcon->need_reconnect) goto skip_sess_setup; @@ -209,9 +210,9 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) } spin_unlock(&ses->chan_lock); - rc = cifs_negotiate_protocol(0, ses); + rc = cifs_negotiate_protocol(0, ses, server); if (!rc) - rc = cifs_setup_session(0, ses, nls_codepage); + rc = cifs_setup_session(0, ses, server, nls_codepage); /* do we need to reconnect tcon? */ if (rc || !tcon->need_reconnect) { @@ -503,14 +504,15 @@ should_set_ext_sec_flag(enum securityEnum sectype) } int -CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) +CIFSSMBNegotiate(const unsigned int xid, + struct cifs_ses *ses, + struct TCP_Server_Info *server) { NEGOTIATE_REQ *pSMB; NEGOTIATE_RSP *pSMBr; int rc = 0; int bytes_returned; int i; - struct TCP_Server_Info *server = ses->server; u16 count; if (!server) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index fa80a23f9fcf..9ee5856d63cc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -169,6 +169,7 @@ static void cifs_resolve_server(struct work_struct *work) */ static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server) { + unsigned int num_sessions = 0; struct cifs_ses *ses; struct cifs_tcon *tcon; struct mid_q_entry *mid, *nmid; @@ -201,6 +202,8 @@ static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server if (!CIFS_ALL_CHANS_NEED_RECONNECT(ses)) goto next_session; + num_sessions++; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) tcon->need_reconnect = true; if (ses->tcon_ipc) @@ -211,6 +214,14 @@ next_session: } spin_unlock(&cifs_tcp_ses_lock); + if (num_sessions == 0) + return; + /* + * before reconnecting the tcp session, mark the smb session (uid) + * and the tid bad so they are not used until reconnected + */ + cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", + __func__); /* do not want to be sending data on a socket we are freeing */ cifs_dbg(FYI, "%s: tearing down socket\n", __func__); mutex_lock(&server->srv_mutex); @@ -2005,7 +2016,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) spin_unlock(&ses->chan_lock); cifs_dbg(FYI, "Session needs reconnect\n"); - rc = cifs_negotiate_protocol(xid, ses); + rc = cifs_negotiate_protocol(xid, ses, server); if (rc) { mutex_unlock(&ses->session_mutex); /* problem -- put our ses reference */ @@ -2014,7 +2025,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) return ERR_PTR(rc); } - rc = cifs_setup_session(xid, ses, + rc = cifs_setup_session(xid, ses, server, ctx->local_nls); if (rc) { mutex_unlock(&ses->session_mutex); @@ -2086,9 +2097,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) ses->chans_need_reconnect = 1; spin_unlock(&ses->chan_lock); - rc = cifs_negotiate_protocol(xid, ses); + rc = cifs_negotiate_protocol(xid, ses, server); if (!rc) - rc = cifs_setup_session(xid, ses, ctx->local_nls); + rc = cifs_setup_session(xid, ses, server, ctx->local_nls); /* each channel uses a different signing key */ memcpy(ses->chans[0].signkey, ses->smb3signingkey, @@ -3820,10 +3831,10 @@ cifs_umount(struct cifs_sb_info *cifs_sb) } int -cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses) +cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, + struct TCP_Server_Info *server) { int rc = 0; - struct TCP_Server_Info *server = cifs_ses_server(ses); if (!server->ops->need_neg || !server->ops->negotiate) return -ENOSYS; @@ -3832,7 +3843,7 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses) if (!server->ops->need_neg(server)) return 0; - rc = server->ops->negotiate(xid, ses); + rc = server->ops->negotiate(xid, ses, server); if (rc == 0) { spin_lock(&GlobalMid_Lock); if (server->tcpStatus == CifsNeedNegotiate) @@ -3847,12 +3858,17 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses) int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, + struct TCP_Server_Info *server, struct nls_table *nls_info) { int rc = -ENOSYS; - struct TCP_Server_Info *server = cifs_ses_server(ses); + bool is_binding = false; - if (!ses->binding) { + spin_lock(&ses->chan_lock); + is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + spin_unlock(&ses->chan_lock); + + if (!is_binding) { ses->capabilities = server->capabilities; if (!linuxExtEnabled) ses->capabilities &= (~server->vals->cap_unix); @@ -3870,7 +3886,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, server->sec_mode, server->capabilities, server->timeAdj); if (server->ops->sess_setup) - rc = server->ops->sess_setup(xid, ses, nls_info); + rc = server->ops->sess_setup(xid, ses, server, nls_info); if (rc) cifs_server_dbg(VFS, "Send error in SessSetup = %d\n", rc); diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index fe707f45da89..6d242af536cb 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -121,7 +121,9 @@ typedef struct _AUTHENTICATE_MESSAGE { int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses); int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, + struct TCP_Server_Info *server, const struct nls_table *nls_cp); int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, + struct TCP_Server_Info *server, const struct nls_table *nls_cp); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index ba8543ccb298..b121a2591e69 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -310,7 +310,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, mutex_lock(&ses->session_mutex); spin_lock(&ses->chan_lock); - chan = ses->binding_chan = &ses->chans[ses->chan_count]; + chan = &ses->chans[ses->chan_count]; chan->server = chan_server; if (IS_ERR(chan->server)) { rc = PTR_ERR(chan->server); @@ -318,6 +318,12 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, spin_unlock(&ses->chan_lock); goto out; } + ses->chan_count++; + atomic_set(&ses->chan_seq, 0); + + /* Mark this channel as needing connect/setup */ + cifs_chan_set_need_reconnect(ses, chan->server); + spin_unlock(&ses->chan_lock); /* @@ -331,41 +337,17 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, goto out; } - ses->binding = true; - rc = cifs_negotiate_protocol(xid, ses); - if (rc) - goto out; - - rc = cifs_setup_session(xid, ses, cifs_sb->local_nls); - if (rc) - goto out; - - /* success, put it on the list - * XXX: sharing ses between 2 tcp servers is not possible, the - * way "internal" linked lists works in linux makes element - * only able to belong to one list - * - * the binding session is already established so the rest of - * the code should be able to look it up, no need to add the - * ses to the new server. - */ - - spin_lock(&ses->chan_lock); - ses->chan_count++; - atomic_set(&ses->chan_seq, 0); - - /* Mark this channel as needing connect/setup */ - cifs_chan_set_need_reconnect(ses, chan->server); - spin_unlock(&ses->chan_lock); + rc = cifs_negotiate_protocol(xid, ses, chan->server); + if (!rc) + rc = cifs_setup_session(xid, ses, chan->server, cifs_sb->local_nls); out: - ses->binding = false; - ses->binding_chan = NULL; - if (rc && chan->server) { + spin_lock(&ses->chan_lock); /* we rely on all bits beyond chan_count to be clear */ cifs_chan_clear_need_reconnect(ses, chan->server); ses->chan_count--; + spin_unlock(&ses->chan_lock); } mutex_unlock(&ses->session_mutex); @@ -389,7 +371,9 @@ void cifs_ses_mark_for_reconnect(struct cifs_ses *ses) } } -static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) +static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, + struct TCP_Server_Info *server, + SESSION_SETUP_ANDX *pSMB) { __u32 capabilities = 0; @@ -402,7 +386,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32, CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4, USHRT_MAX)); - pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); + pSMB->req.MaxMpxCount = cpu_to_le16(server->maxReq); pSMB->req.VcNumber = cpu_to_le16(1); /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ @@ -413,7 +397,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; - if (ses->server->sign) + if (server->sign) pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; if (ses->capabilities & CAP_UNICODE) { @@ -776,10 +760,10 @@ static inline void cifs_security_buffer_from_str(SECURITY_BUFFER *pbuf, int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, + struct TCP_Server_Info *server, const struct nls_table *nls_cp) { int rc = 0; - struct TCP_Server_Info *server = cifs_ses_server(ses); NEGOTIATE_MESSAGE *sec_blob; __u32 flags; unsigned char *tmp; @@ -833,6 +817,7 @@ setup_ntlm_neg_ret: int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, + struct TCP_Server_Info *server, const struct nls_table *nls_cp) { int rc; @@ -969,6 +954,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) struct sess_data { unsigned int xid; struct cifs_ses *ses; + struct TCP_Server_Info *server; struct nls_table *nls_cp; void (*func)(struct sess_data *); int result; @@ -1035,35 +1021,34 @@ static int sess_establish_session(struct sess_data *sess_data) { struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; - mutex_lock(&ses->server->srv_mutex); - if (!ses->server->session_estab) { - if (ses->server->sign) { - ses->server->session_key.response = + mutex_lock(&server->srv_mutex); + if (!server->session_estab) { + if (server->sign) { + server->session_key.response = kmemdup(ses->auth_key.response, ses->auth_key.len, GFP_KERNEL); - if (!ses->server->session_key.response) { - mutex_unlock(&ses->server->srv_mutex); + if (!server->session_key.response) { + mutex_unlock(&server->srv_mutex); return -ENOMEM; } - ses->server->session_key.len = + server->session_key.len = ses->auth_key.len; } - ses->server->sequence_number = 0x2; - ses->server->session_estab = true; + server->sequence_number = 0x2; + server->session_estab = true; } - mutex_unlock(&ses->server->srv_mutex); + mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "CIFS session established successfully\n"); - if (ses->binding) - cifs_chan_clear_need_reconnect(ses, ses->binding_chan->server); - else - cifs_chan_clear_need_reconnect(ses, ses->server); + spin_lock(&ses->chan_lock); + cifs_chan_clear_need_reconnect(ses, server); + spin_unlock(&ses->chan_lock); - /* keep existing ses state if binding */ + /* Even if one channel is active, session is in good state */ spin_lock(&GlobalMid_Lock); - if (!ses->binding) - ses->status = CifsGood; + ses->status = CifsGood; spin_unlock(&GlobalMid_Lock); return 0; @@ -1099,6 +1084,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data) SESSION_SETUP_ANDX *pSMB; char *bcc_ptr; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; __u32 capabilities; __u16 bytes_remaining; @@ -1110,7 +1096,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data) pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; bcc_ptr = sess_data->iov[2].iov_base; - capabilities = cifs_ssetup_hdr(ses, pSMB); + capabilities = cifs_ssetup_hdr(ses, server, pSMB); pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); @@ -1208,6 +1194,7 @@ sess_auth_kerberos(struct sess_data *sess_data) SESSION_SETUP_ANDX *pSMB; char *bcc_ptr; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; __u32 capabilities; __u16 bytes_remaining; struct key *spnego_key = NULL; @@ -1222,9 +1209,9 @@ sess_auth_kerberos(struct sess_data *sess_data) pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; bcc_ptr = sess_data->iov[2].iov_base; - capabilities = cifs_ssetup_hdr(ses, pSMB); + capabilities = cifs_ssetup_hdr(ses, server, pSMB); - spnego_key = cifs_get_spnego_key(ses); + spnego_key = cifs_get_spnego_key(ses, server); if (IS_ERR(spnego_key)) { rc = PTR_ERR(spnego_key); spnego_key = NULL; @@ -1348,12 +1335,13 @@ _sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data) { SESSION_SETUP_ANDX *pSMB; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; __u32 capabilities; char *bcc_ptr; pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; - capabilities = cifs_ssetup_hdr(ses, pSMB); + capabilities = cifs_ssetup_hdr(ses, server, pSMB); if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { cifs_dbg(VFS, "NTLMSSP requires Unicode support\n"); return -ENOSYS; @@ -1387,6 +1375,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) struct smb_hdr *smb_buf; SESSION_SETUP_ANDX *pSMB; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; __u16 bytes_remaining; char *bcc_ptr; unsigned char *ntlmsspblob = NULL; @@ -1414,7 +1403,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) /* Build security blob before we assemble the request */ rc = build_ntlmssp_negotiate_blob(&ntlmsspblob, - &blob_len, ses, + &blob_len, ses, server, sess_data->nls_cp); if (rc) goto out; @@ -1489,6 +1478,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) struct smb_hdr *smb_buf; SESSION_SETUP_ANDX *pSMB; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; __u16 bytes_remaining; char *bcc_ptr; unsigned char *ntlmsspblob = NULL; @@ -1505,7 +1495,8 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; smb_buf = (struct smb_hdr *)pSMB; rc = build_ntlmssp_auth_blob(&ntlmsspblob, - &blob_len, ses, sess_data->nls_cp); + &blob_len, ses, server, + sess_data->nls_cp); if (rc) goto out_free_ntlmsspblob; sess_data->iov[1].iov_len = blob_len; @@ -1589,11 +1580,13 @@ out: sess_data->result = rc; } -static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data) +static int select_sec(struct sess_data *sess_data) { int type; + struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; - type = cifs_select_sectype(ses->server, ses->sectype); + type = cifs_select_sectype(server, ses->sectype); cifs_dbg(FYI, "sess setup type %d\n", type); if (type == Unspecified) { cifs_dbg(VFS, "Unable to select appropriate authentication method!\n"); @@ -1624,7 +1617,8 @@ static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data) } int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *nls_cp) + struct TCP_Server_Info *server, + const struct nls_table *nls_cp) { int rc = 0; struct sess_data *sess_data; @@ -1638,15 +1632,16 @@ int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, if (!sess_data) return -ENOMEM; - rc = select_sec(ses, sess_data); - if (rc) - goto out; - sess_data->xid = xid; sess_data->ses = ses; + sess_data->server = server; sess_data->buf0_type = CIFS_NO_BUFFER; sess_data->nls_cp = (struct nls_table *) nls_cp; + rc = select_sec(sess_data); + if (rc) + goto out; + while (sess_data->func) sess_data->func(sess_data); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3b83839fc2c2..5366202d343d 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -414,14 +414,16 @@ cifs_need_neg(struct TCP_Server_Info *server) } static int -cifs_negotiate(const unsigned int xid, struct cifs_ses *ses) +cifs_negotiate(const unsigned int xid, + struct cifs_ses *ses, + struct TCP_Server_Info *server) { int rc; - rc = CIFSSMBNegotiate(xid, ses); + rc = CIFSSMBNegotiate(xid, ses, server); if (rc == -EAGAIN) { /* retry only once on 1st time connection */ - set_credits(ses->server, 1); - rc = CIFSSMBNegotiate(xid, ses); + set_credits(server, 1); + rc = CIFSSMBNegotiate(xid, ses, server); if (rc == -EAGAIN) rc = -EHOSTDOWN; } diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index cdcdef32759e..396d5afa7cf1 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -851,12 +851,12 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve * @nvec: number of array entries for the iov */ int -smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec) +smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, + struct kvec *iov, int nvec) { int i, rc; struct sdesc *d; struct smb2_hdr *hdr; - struct TCP_Server_Info *server = cifs_ses_server(ses); hdr = (struct smb2_hdr *)iov[0].iov_base; /* neg prot are always taken */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c5b1dea54ebc..c97dd9758c69 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -384,14 +384,16 @@ smb2_need_neg(struct TCP_Server_Info *server) } static int -smb2_negotiate(const unsigned int xid, struct cifs_ses *ses) +smb2_negotiate(const unsigned int xid, + struct cifs_ses *ses, + struct TCP_Server_Info *server) { int rc; spin_lock(&GlobalMid_Lock); - cifs_ses_server(ses)->CurrentMid = 0; + server->CurrentMid = 0; spin_unlock(&GlobalMid_Lock); - rc = SMB2_negotiate(xid, ses); + rc = SMB2_negotiate(xid, ses, server); /* BB we probably don't need to retry with modern servers */ if (rc == -EAGAIN) rc = -EHOSTDOWN; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index f3b2eef36ec1..9e7b213dbef5 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -248,7 +248,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, * need to prevent multiple threads trying to simultaneously reconnect * the same SMB session */ - mutex_lock(&tcon->ses->session_mutex); + mutex_lock(&ses->session_mutex); /* * Recheck after acquire mutex. If another thread is negotiating @@ -257,18 +257,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, */ if (server->tcpStatus == CifsNeedReconnect) { rc = -EHOSTDOWN; - mutex_unlock(&tcon->ses->session_mutex); + mutex_unlock(&ses->session_mutex); goto out; } - /* - * If we are reconnecting an extra channel, bind - */ - if (CIFS_SERVER_IS_CHAN(server)) { - ses->binding = true; - ses->binding_chan = cifs_ses_find_chan(ses, server); - } - /* * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session @@ -277,7 +269,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, if (!cifs_chan_needs_reconnect(ses, server)) { spin_unlock(&ses->chan_lock); - /* this just means that we only need to tcon */ + /* this means that we only need to tree connect */ if (tcon->need_reconnect) goto skip_sess_setup; @@ -287,22 +279,15 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, } spin_unlock(&ses->chan_lock); - rc = cifs_negotiate_protocol(0, tcon->ses); + rc = cifs_negotiate_protocol(0, ses, server); if (!rc) { - rc = cifs_setup_session(0, tcon->ses, nls_codepage); + rc = cifs_setup_session(0, ses, server, nls_codepage); if ((rc == -EACCES) && !tcon->retry) { rc = -EHOSTDOWN; - ses->binding = false; - ses->binding_chan = NULL; - mutex_unlock(&tcon->ses->session_mutex); + mutex_unlock(&ses->session_mutex); goto failed; } } - /* - * End of channel binding - */ - ses->binding = false; - ses->binding_chan = NULL; if (rc || !tcon->need_reconnect) { mutex_unlock(&tcon->ses->session_mutex); @@ -859,7 +844,9 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode) */ int -SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) +SMB2_negotiate(const unsigned int xid, + struct cifs_ses *ses, + struct TCP_Server_Info *server) { struct smb_rqst rqst; struct smb2_negotiate_req *req; @@ -868,7 +855,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) struct kvec rsp_iov; int rc = 0; int resp_buftype; - struct TCP_Server_Info *server = cifs_ses_server(ses); int blob_offset, blob_length; char *security_blob; int flags = CIFS_NEG_OP; @@ -1247,6 +1233,7 @@ smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) struct SMB2_sess_data { unsigned int xid; struct cifs_ses *ses; + struct TCP_Server_Info *server; struct nls_table *nls_cp; void (*func)(struct SMB2_sess_data *); int result; @@ -1268,9 +1255,10 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) { int rc; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; struct smb2_sess_setup_req *req; - struct TCP_Server_Info *server = cifs_ses_server(ses); unsigned int total_len; + bool is_binding = false; rc = smb2_plain_req_init(SMB2_SESSION_SETUP, NULL, server, (void **) &req, @@ -1278,11 +1266,16 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) if (rc) return rc; - if (sess_data->ses->binding) { - req->hdr.SessionId = cpu_to_le64(sess_data->ses->Suid); + spin_lock(&ses->chan_lock); + is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + spin_unlock(&ses->chan_lock); + + if (is_binding) { + req->hdr.SessionId = cpu_to_le64(ses->Suid); req->hdr.Flags |= SMB2_FLAGS_SIGNED; req->PreviousSessionId = 0; req->Flags = SMB2_SESSION_REQ_FLAG_BINDING; + cifs_dbg(FYI, "Binding to sess id: %llx\n", ses->Suid); } else { /* First session, not a reauthenticate */ req->hdr.SessionId = 0; @@ -1292,6 +1285,8 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) */ req->PreviousSessionId = cpu_to_le64(sess_data->previous_session); req->Flags = 0; /* MBZ */ + cifs_dbg(FYI, "Fresh session. Previous: %llx\n", + sess_data->previous_session); } /* enough to enable echos and oplocks and one max size write */ @@ -1351,7 +1346,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data) /* BB add code to build os and lm fields */ rc = cifs_send_recv(sess_data->xid, sess_data->ses, - cifs_ses_server(sess_data->ses), + sess_data->server, &rqst, &sess_data->buf0_type, CIFS_LOG_ERROR | CIFS_SESS_OP, &rsp_iov); @@ -1366,11 +1361,11 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) { int rc = 0; struct cifs_ses *ses = sess_data->ses; - struct TCP_Server_Info *server = cifs_ses_server(ses); + struct TCP_Server_Info *server = sess_data->server; mutex_lock(&server->srv_mutex); if (server->ops->generate_signingkey) { - rc = server->ops->generate_signingkey(ses); + rc = server->ops->generate_signingkey(ses, server); if (rc) { cifs_dbg(FYI, "SMB3 session key generation failed\n"); @@ -1387,16 +1382,12 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) cifs_dbg(FYI, "SMB2/3 session established successfully\n"); spin_lock(&ses->chan_lock); - if (ses->binding) - cifs_chan_clear_need_reconnect(ses, ses->binding_chan->server); - else - cifs_chan_clear_need_reconnect(ses, ses->server); + cifs_chan_clear_need_reconnect(ses, server); spin_unlock(&ses->chan_lock); - /* keep existing ses state if binding */ + /* Even if one channel is active, session is in good state */ spin_lock(&GlobalMid_Lock); - if (!ses->binding) - ses->status = CifsGood; + ses->status = CifsGood; spin_unlock(&GlobalMid_Lock); return rc; @@ -1408,15 +1399,17 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) { int rc; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; struct cifs_spnego_msg *msg; struct key *spnego_key = NULL; struct smb2_sess_setup_rsp *rsp = NULL; + bool is_binding = false; rc = SMB2_sess_alloc_buffer(sess_data); if (rc) goto out; - spnego_key = cifs_get_spnego_key(ses); + spnego_key = cifs_get_spnego_key(ses, server); if (IS_ERR(spnego_key)) { rc = PTR_ERR(spnego_key); if (rc == -ENOKEY) @@ -1437,8 +1430,12 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) goto out_put_spnego_key; } + spin_lock(&ses->chan_lock); + is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + spin_unlock(&ses->chan_lock); + /* keep session key if binding */ - if (!ses->binding) { + if (!is_binding) { ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { @@ -1459,7 +1456,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base; /* keep session id and flags if binding */ - if (!ses->binding) { + if (!is_binding) { ses->Suid = le64_to_cpu(rsp->hdr.SessionId); ses->session_flags = le16_to_cpu(rsp->SessionFlags); } @@ -1491,10 +1488,12 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) { int rc; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; struct smb2_sess_setup_rsp *rsp = NULL; unsigned char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ u16 blob_length = 0; + bool is_binding = false; /* * If memory allocation is successful, caller of this function @@ -1512,7 +1511,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) goto out_err; rc = build_ntlmssp_negotiate_blob(&ntlmssp_blob, - &blob_length, ses, + &blob_length, ses, server, sess_data->nls_cp); if (rc) goto out_err; @@ -1551,8 +1550,12 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n"); + spin_lock(&ses->chan_lock); + is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + spin_unlock(&ses->chan_lock); + /* keep existing ses id and flags if binding */ - if (!ses->binding) { + if (!is_binding) { ses->Suid = le64_to_cpu(rsp->hdr.SessionId); ses->session_flags = le16_to_cpu(rsp->SessionFlags); } @@ -1577,11 +1580,13 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) { int rc; struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; struct smb2_sess_setup_req *req; struct smb2_sess_setup_rsp *rsp = NULL; unsigned char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ u16 blob_length = 0; + bool is_binding = false; rc = SMB2_sess_alloc_buffer(sess_data); if (rc) @@ -1590,8 +1595,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base; req->hdr.SessionId = cpu_to_le64(ses->Suid); - rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses, - sess_data->nls_cp); + rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, + ses, server, + sess_data->nls_cp); if (rc) { cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", rc); goto out; @@ -1612,8 +1618,12 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base; + spin_lock(&ses->chan_lock); + is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + spin_unlock(&ses->chan_lock); + /* keep existing ses id and flags if binding */ - if (!ses->binding) { + if (!is_binding) { ses->Suid = le64_to_cpu(rsp->hdr.SessionId); ses->session_flags = le16_to_cpu(rsp->SessionFlags); } @@ -1644,11 +1654,13 @@ out: } static int -SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data) +SMB2_select_sec(struct SMB2_sess_data *sess_data) { int type; + struct cifs_ses *ses = sess_data->ses; + struct TCP_Server_Info *server = sess_data->server; - type = smb2_select_sectype(cifs_ses_server(ses), ses->sectype); + type = smb2_select_sectype(server, ses->sectype); cifs_dbg(FYI, "sess setup type %d\n", type); if (type == Unspecified) { cifs_dbg(VFS, "Unable to select appropriate authentication method!\n"); @@ -1672,10 +1684,10 @@ SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data) int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, + struct TCP_Server_Info *server, const struct nls_table *nls_cp) { int rc = 0; - struct TCP_Server_Info *server = cifs_ses_server(ses); struct SMB2_sess_data *sess_data; cifs_dbg(FYI, "Session Setup\n"); @@ -1689,15 +1701,17 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, if (!sess_data) return -ENOMEM; - rc = SMB2_select_sec(ses, sess_data); - if (rc) - goto out; sess_data->xid = xid; sess_data->ses = ses; + sess_data->server = server; sess_data->buf0_type = CIFS_NO_BUFFER; sess_data->nls_cp = (struct nls_table *) nls_cp; sess_data->previous_session = ses->Suid; + rc = SMB2_select_sec(sess_data); + if (rc) + goto out; + /* * Initialize the session hash with the server one. */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 096fada16ebd..4a7062fd1c26 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -123,8 +123,11 @@ extern void smb2_set_related(struct smb_rqst *rqst); * SMB2 Worker functions - most of protocol specific implementation details * are contained within these calls. */ -extern int SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses); +extern int SMB2_negotiate(const unsigned int xid, + struct cifs_ses *ses, + struct TCP_Server_Info *server); extern int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, + struct TCP_Server_Info *server, const struct nls_table *nls_cp); extern int SMB2_logoff(const unsigned int xid, struct cifs_ses *ses); extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, @@ -276,6 +279,7 @@ extern void smb2_copy_fs_info_to_kstatfs( struct kstatfs *kst); extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); extern int smb311_update_preauth_hash(struct cifs_ses *ses, + struct TCP_Server_Info *server, struct kvec *iov, int nvec); extern int smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 2bf047b390a9..112adf153807 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -100,7 +100,8 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) goto out; found: - if (ses->binding) { + if (cifs_chan_needs_reconnect(ses, server) && + !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { /* * If we are in the process of binding a new channel * to an existing session, use the master connection @@ -390,12 +391,18 @@ struct derivation_triplet { static int generate_smb3signingkey(struct cifs_ses *ses, + struct TCP_Server_Info *server, const struct derivation_triplet *ptriplet) { int rc; -#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS - struct TCP_Server_Info *server = ses->server; -#endif + bool is_binding = false; + int chan_index = 0; + + spin_lock(&ses->chan_lock); + is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + chan_index = cifs_ses_get_chan_index(ses, server); + /* TODO: introduce ref counting for channels when the can be freed */ + spin_unlock(&ses->chan_lock); /* * All channels use the same encryption/decryption keys but @@ -407,10 +414,10 @@ generate_smb3signingkey(struct cifs_ses *ses, * master connection signing key stored in the session */ - if (ses->binding) { + if (is_binding) { rc = generate_key(ses, ptriplet->signing.label, ptriplet->signing.context, - cifs_ses_binding_channel(ses)->signkey, + ses->chans[chan_index].signkey, SMB3_SIGN_KEY_SIZE); if (rc) return rc; @@ -422,6 +429,7 @@ generate_smb3signingkey(struct cifs_ses *ses, if (rc) return rc; + /* safe to access primary channel, since it will never go away */ memcpy(ses->chans[0].signkey, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); @@ -470,7 +478,8 @@ generate_smb3signingkey(struct cifs_ses *ses, } int -generate_smb30signingkey(struct cifs_ses *ses) +generate_smb30signingkey(struct cifs_ses *ses, + struct TCP_Server_Info *server) { struct derivation_triplet triplet; @@ -494,11 +503,12 @@ generate_smb30signingkey(struct cifs_ses *ses) d->context.iov_base = "ServerOut"; d->context.iov_len = 10; - return generate_smb3signingkey(ses, &triplet); + return generate_smb3signingkey(ses, server, &triplet); } int -generate_smb311signingkey(struct cifs_ses *ses) +generate_smb311signingkey(struct cifs_ses *ses, + struct TCP_Server_Info *server) { struct derivation_triplet triplet; @@ -522,7 +532,7 @@ generate_smb311signingkey(struct cifs_ses *ses) d->context.iov_base = ses->preauth_sha_hash; d->context.iov_len = 64; - return generate_smb3signingkey(ses, &triplet); + return generate_smb3signingkey(ses, server, &triplet); } int diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 61ea3d3f95b4..1d81681d9b97 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -1045,18 +1045,19 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) return NULL; spin_lock(&ses->chan_lock); - if (!ses->binding) { - /* round robin */ - if (ses->chan_count > 1) { - index = (uint)atomic_inc_return(&ses->chan_seq); - index %= ses->chan_count; - } - spin_unlock(&ses->chan_lock); - return ses->chans[index].server; - } else { - spin_unlock(&ses->chan_lock); - return cifs_ses_server(ses); + /* round robin */ +pick_another: + if (ses->chan_count > 1 && + !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { + index = (uint)atomic_inc_return(&ses->chan_seq); + index %= ses->chan_count; + + if (CIFS_CHAN_NEEDS_RECONNECT(ses, index)) + goto pick_another; } + spin_unlock(&ses->chan_lock); + + return ses->chans[index].server; } int @@ -1190,8 +1191,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, */ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { mutex_lock(&server->srv_mutex); - smb311_update_preauth_hash(ses, rqst[0].rq_iov, - rqst[0].rq_nvec); + smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec); mutex_unlock(&server->srv_mutex); } @@ -1262,7 +1262,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, .iov_len = resp_iov[0].iov_len }; mutex_lock(&server->srv_mutex); - smb311_update_preauth_hash(ses, &iov, 1); + smb311_update_preauth_hash(ses, server, &iov, 1); mutex_unlock(&server->srv_mutex); } From 66eb0c6e66617cace0d626d48819bf2b5fbf9307 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 19 Jul 2021 14:01:32 +0000 Subject: [PATCH 161/493] cifs: adjust DebugData to use chans_need_reconnect for conn status Use ses->chans_need_reconnect bitmask to print the connection status of each channel under an SMB session. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index d282caf9f037..ea00e1a91250 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -416,11 +416,17 @@ skip_rdma: from_kuid(&init_user_ns, ses->cred_uid)); spin_lock(&ses->chan_lock); + if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0)) + seq_puts(m, "\tPrimary channel: DISCONNECTED "); + if (ses->chan_count > 1) { seq_printf(m, "\n\n\tExtra Channels: %zu ", ses->chan_count-1); - for (j = 1; j < ses->chan_count; j++) + for (j = 1; j < ses->chan_count; j++) { cifs_dump_channel(m, j, &ses->chans[j]); + if (CIFS_CHAN_NEEDS_RECONNECT(ses, j)) + seq_puts(m, "\tDISCONNECTED "); + } } spin_unlock(&ses->chan_lock); From 2e0fa298d149e07005504350358066f380f72b52 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 19 Jul 2021 14:04:11 +0000 Subject: [PATCH 162/493] cifs: add WARN_ON for when chan_count goes below minimum chan_count keeps track of the total number of channels. Since at least the primary channel will always be connected, this value can never go below 1. Warn if that happens. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/sess.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index b121a2591e69..61fc8cb1ec8f 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -347,6 +347,11 @@ out: /* we rely on all bits beyond chan_count to be clear */ cifs_chan_clear_need_reconnect(ses, chan->server); ses->chan_count--; + /* + * chan_count should never reach 0 as at least the primary + * channel is always allocated + */ + WARN_ON(ses->chan_count < 1); spin_unlock(&ses->chan_lock); } From 183eea2ee5ba968ca7c31f04a0f01fd3e5c1d014 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 19 Jul 2021 14:14:46 +0000 Subject: [PATCH 163/493] cifs: reconnect only the connection and not smb session where possible With the new per-channel bitmask for reconnect, we have an option to reconnect the tcp session associated with the channel without reconnecting the smb session. i.e. if there are still channels to operate on, we can continue to use the smb session and tcon. However, there are cases where it makes sense to reconnect the smb session even when there are active channels underneath. For example for SMB session expiry. With this patch, we'll have an option to do either, and use the correct option for specific cases. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/cifsproto.h | 3 +- fs/cifs/cifssmb.c | 2 +- fs/cifs/connect.c | 74 +++++++++++++++++++++++++++------------------ fs/cifs/smb2ops.c | 6 ++-- 5 files changed, 52 insertions(+), 35 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 76b4adc7d738..23d76ae713f0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -973,6 +973,8 @@ struct cifs_ses { ((1UL << (ses)->chan_count) - 1) #define CIFS_ALL_CHANS_NEED_RECONNECT(ses) \ ((ses)->chans_need_reconnect == CIFS_ALL_CHANNELS_SET(ses)) +#define CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses) \ + ((ses)->chans_need_reconnect = CIFS_ALL_CHANNELS_SET(ses)) #define CIFS_CHAN_NEEDS_RECONNECT(ses, index) \ test_bit((index), &(ses)->chans_need_reconnect) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 09356a9abe9c..e0dc147e69a8 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -131,7 +131,8 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *in_buf , struct smb_hdr *out_buf, int *bytes_returned); -extern int cifs_reconnect(struct TCP_Server_Info *server); +extern int cifs_reconnect(struct TCP_Server_Info *server, + bool mark_smb_session); extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr); extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *); extern bool backup_cred(struct cifs_sb_info *); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 8b3e5be483bc..7b1d0d71f3f1 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1439,7 +1439,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) if (server->ops->is_session_expired && server->ops->is_session_expired(buf)) { - cifs_reconnect(server); + cifs_reconnect(server, true); return -1; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9ee5856d63cc..7b478f5db9d6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -166,8 +166,11 @@ static void cifs_resolve_server(struct work_struct *work) * Mark all sessions and tcons for reconnect. * * @server needs to be previously set to CifsNeedReconnect. + * */ -static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server) +static void +cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, + bool mark_smb_session) { unsigned int num_sessions = 0; struct cifs_ses *ses; @@ -193,13 +196,16 @@ static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { spin_lock(&ses->chan_lock); - if (cifs_chan_needs_reconnect(ses, server)) + if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) goto next_session; - cifs_chan_set_need_reconnect(ses, server); + if (mark_smb_session) + CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses); + else + cifs_chan_set_need_reconnect(ses, server); /* If all channels need reconnect, then tcon needs reconnect */ - if (!CIFS_ALL_CHANS_NEED_RECONNECT(ses)) + if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) goto next_session; num_sessions++; @@ -271,16 +277,16 @@ next_session: static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets) { - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); server->nr_targets = num_targets; if (server->tcpStatus == CifsExiting) { /* the demux thread will exit normally next time through the loop */ - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); wake_up(&server->response_q); return false; } server->tcpStatus = CifsNeedReconnect; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); return true; } @@ -291,15 +297,21 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num * mark all smb sessions as reconnecting for tcp session * reconnect tcp session * wake up waiters on reconnection? - (not needed currently) + * + * if mark_smb_session is passed as true, unconditionally mark + * the smb session (and tcon) for reconnect as well. This value + * doesn't really matter for non-multichannel scenario. + * */ -static int __cifs_reconnect(struct TCP_Server_Info *server) +static int __cifs_reconnect(struct TCP_Server_Info *server, + bool mark_smb_session) { int rc = 0; if (!cifs_tcp_ses_needs_reconnect(server, 1)) return 0; - cifs_mark_tcp_ses_conns_for_reconnect(server); + cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); do { try_to_freeze(); @@ -322,10 +334,10 @@ static int __cifs_reconnect(struct TCP_Server_Info *server) } else { atomic_inc(&tcpSesReconnectCount); set_credits(server, 1); - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus != CifsExiting) server->tcpStatus = CifsNeedNegotiate; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); cifs_swn_reset_server_dstaddr(server); mutex_unlock(&server->srv_mutex); } @@ -394,7 +406,9 @@ static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_ return rc; } -static int reconnect_dfs_server(struct TCP_Server_Info *server) +static int +reconnect_dfs_server(struct TCP_Server_Info *server, + bool mark_smb_session) { int rc = 0; const char *refpath = server->current_fullpath + 1; @@ -418,7 +432,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) if (!cifs_tcp_ses_needs_reconnect(server, num_targets)) return 0; - cifs_mark_tcp_ses_conns_for_reconnect(server); + cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); do { try_to_freeze(); @@ -439,10 +453,10 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) */ atomic_inc(&tcpSesReconnectCount); set_credits(server, 1); - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus != CifsExiting) server->tcpStatus = CifsNeedNegotiate; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); cifs_swn_reset_server_dstaddr(server); mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); @@ -460,22 +474,22 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) return rc; } -int cifs_reconnect(struct TCP_Server_Info *server) +int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) { /* If tcp session is not an dfs connection, then reconnect to last target server */ spin_lock(&cifs_tcp_ses_lock); if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) { spin_unlock(&cifs_tcp_ses_lock); - return __cifs_reconnect(server); + return __cifs_reconnect(server, mark_smb_session); } spin_unlock(&cifs_tcp_ses_lock); - return reconnect_dfs_server(server); + return reconnect_dfs_server(server, mark_smb_session); } #else -int cifs_reconnect(struct TCP_Server_Info *server) +int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) { - return __cifs_reconnect(server); + return __cifs_reconnect(server, mark_smb_session); } #endif @@ -563,7 +577,7 @@ server_unresponsive(struct TCP_Server_Info *server) time_after(jiffies, server->lstrp + 3 * server->echo_interval)) { cifs_server_dbg(VFS, "has not responded in %lu seconds. Reconnecting...\n", (3 * server->echo_interval) / HZ); - cifs_reconnect(server); + cifs_reconnect(server, false); return true; } @@ -599,7 +613,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) /* reconnect if no credits and no requests in flight */ if (zero_credits(server)) { - cifs_reconnect(server); + cifs_reconnect(server, false); return -ECONNABORTED; } @@ -614,7 +628,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) return -ESHUTDOWN; if (server->tcpStatus == CifsNeedReconnect) { - cifs_reconnect(server); + cifs_reconnect(server, false); return -ECONNABORTED; } @@ -633,7 +647,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) if (length <= 0) { cifs_dbg(FYI, "Received no data or error: %d\n", length); - cifs_reconnect(server); + cifs_reconnect(server, false); return -ECONNABORTED; } } @@ -712,11 +726,11 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) * initialize frame). */ cifs_set_port((struct sockaddr *)&server->dstaddr, CIFS_PORT); - cifs_reconnect(server); + cifs_reconnect(server, true); break; default: cifs_server_dbg(VFS, "RFC 1002 unknown response type 0x%x\n", type); - cifs_reconnect(server); + cifs_reconnect(server, true); } return false; @@ -889,7 +903,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - server->vals->header_preamble_size) { cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); - cifs_reconnect(server); + cifs_reconnect(server, true); return -ECONNABORTED; } @@ -936,7 +950,7 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid) if (server->ops->is_session_expired && server->ops->is_session_expired(buf)) { - cifs_reconnect(server); + cifs_reconnect(server, true); return -1; } @@ -1040,7 +1054,7 @@ next_pdu: server->vals->header_preamble_size) { cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n", server->pdu_size); - cifs_reconnect(server); + cifs_reconnect(server, true); continue; } @@ -1092,7 +1106,7 @@ next_pdu: server->ops->is_status_io_timeout(buf)) { num_io_timeout++; if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) { - cifs_reconnect(server); + cifs_reconnect(server, false); num_io_timeout = 0; continue; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c97dd9758c69..b33b0f391a23 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4810,7 +4810,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, if (server->ops->is_session_expired && server->ops->is_session_expired(buf)) { if (!is_offloaded) - cifs_reconnect(server); + cifs_reconnect(server, true); return -1; } @@ -5223,13 +5223,13 @@ smb3_receive_transform(struct TCP_Server_Info *server, sizeof(struct smb2_hdr)) { cifs_server_dbg(VFS, "Transform message is too small (%u)\n", pdu_length); - cifs_reconnect(server); + cifs_reconnect(server, true); return -ECONNABORTED; } if (pdu_length < orig_len + sizeof(struct smb2_transform_hdr)) { cifs_server_dbg(VFS, "Transform message is broken\n"); - cifs_reconnect(server); + cifs_reconnect(server, true); return -ECONNABORTED; } From 401c151164f20f71b5d6a0bc8ff6abe7e180a535 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 22 Dec 2021 20:45:06 +0900 Subject: [PATCH 164/493] dt-bindings: renesas,rcar-dmac: Add r8a779f0 support Document the compatible value for the Direct Memory Access Controller blocks in the Renesas R-Car S4-8 (R8A779F0) SoC. The most visible difference with DMAC blocks on other R-Car SoCs (except R8A779A0) is the move of the per-channel registers to a separate register block. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211222114507.1252947-2-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml index d8142cbd13d3..7c6badf39921 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml @@ -44,6 +44,10 @@ properties: - items: - const: renesas,dmac-r8a779a0 # R-Car V3U + - items: + - const: renesas,dmac-r8a779f0 # R-Car S4-8 + - const: renesas,rcar-gen4-dmac + reg: true interrupts: @@ -118,6 +122,7 @@ if: contains: enum: - renesas,dmac-r8a779a0 + - renesas,rcar-gen4-dmac then: properties: reg: From 2fe6777b8d4967f88d00c315dda11db180262811 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 22 Dec 2021 20:45:07 +0900 Subject: [PATCH 165/493] dmaengine: rcar-dmac: Add support for R-Car S4-8 Add support for R-Car S4-8. We can reuse R-Car V3U code so that renames variable names as "gen4". Note that some registers of R-Car V3U do not exist on R-Car S4-8, but none of them are used by the driver for now. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Ulrich Hecht Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211222114507.1252947-3-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 5c7716fd6bc5..481f45c77ce1 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -236,7 +236,7 @@ struct rcar_dmac_of_data { #define RCAR_DMAOR_PRI_ROUND_ROBIN (3 << 8) #define RCAR_DMAOR_AE (1 << 2) #define RCAR_DMAOR_DME (1 << 0) -#define RCAR_DMACHCLR 0x0080 /* Not on R-Car V3U */ +#define RCAR_DMACHCLR 0x0080 /* Not on R-Car Gen4 */ #define RCAR_DMADPSEC 0x00a0 #define RCAR_DMASAR 0x0000 @@ -299,8 +299,8 @@ struct rcar_dmac_of_data { #define RCAR_DMAFIXDAR 0x0014 #define RCAR_DMAFIXDPBASE 0x0060 -/* For R-Car V3U */ -#define RCAR_V3U_DMACHCLR 0x0100 +/* For R-Car Gen4 */ +#define RCAR_GEN4_DMACHCLR 0x0100 /* Hardcode the MEMCPY transfer size to 4 bytes. */ #define RCAR_DMAC_MEMCPY_XFER_SIZE 4 @@ -345,7 +345,7 @@ static void rcar_dmac_chan_clear(struct rcar_dmac *dmac, struct rcar_dmac_chan *chan) { if (dmac->chan_base) - rcar_dmac_chan_write(chan, RCAR_V3U_DMACHCLR, 1); + rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1); else rcar_dmac_write(dmac, RCAR_DMACHCLR, BIT(chan->index)); } @@ -357,7 +357,7 @@ static void rcar_dmac_chan_clear_all(struct rcar_dmac *dmac) if (dmac->chan_base) { for_each_rcar_dmac_chan(i, dmac, chan) - rcar_dmac_chan_write(chan, RCAR_V3U_DMACHCLR, 1); + rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1); } else { rcar_dmac_write(dmac, RCAR_DMACHCLR, dmac->channels_mask); } @@ -2009,7 +2009,7 @@ static const struct rcar_dmac_of_data rcar_dmac_data = { .chan_offset_stride = 0x80, }; -static const struct rcar_dmac_of_data rcar_v3u_dmac_data = { +static const struct rcar_dmac_of_data rcar_gen4_dmac_data = { .chan_offset_base = 0x0, .chan_offset_stride = 0x1000, }; @@ -2018,9 +2018,12 @@ static const struct of_device_id rcar_dmac_of_ids[] = { { .compatible = "renesas,rcar-dmac", .data = &rcar_dmac_data, + }, { + .compatible = "renesas,rcar-gen4-dmac", + .data = &rcar_gen4_dmac_data, }, { .compatible = "renesas,dmac-r8a779a0", - .data = &rcar_v3u_dmac_data, + .data = &rcar_gen4_dmac_data, }, { /* Sentinel */ } }; From e7f110889a87307fb0fed408a5dee1707796ca04 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 20 Dec 2021 17:58:27 +0100 Subject: [PATCH 166/493] dmaengine: stm32-mdma: fix STM32_MDMA_CTBR_TSEL_MASK This patch fixes STM32_MDMA_CTBR_TSEL_MASK, which is [5:0], not [7:0]. Fixes: a4ffb13c8946 ("dmaengine: Add STM32 MDMA driver") Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211220165827.1238097-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 76cf2e333e63..6f57ff0e7b37 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -157,7 +157,7 @@ #define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x)) #define STM32_MDMA_CTBR_DBUS BIT(17) #define STM32_MDMA_CTBR_SBUS BIT(16) -#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0) +#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0) #define STM32_MDMA_CTBR_TSEL(n) FIELD_PREP(STM32_MDMA_CTBR_TSEL_MASK, (n)) /* MDMA Channel x mask address register */ From 29f306340fa896e1edd9473cd7956f34c52f40ab Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 17 Dec 2021 11:06:43 -0600 Subject: [PATCH 167/493] dt-bindings: dma: pl330: Convert to DT schema Convert the Arm PL330 DMA controller binding to DT schema. The '#dma-channels' and '#dma-requests' properties are unused as they are discoverable and are non-standard (the standard props don't have '#'). So drop them from the binding. Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20211217170644.3145332-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/arm,pl330.yaml | 83 +++++++++++++++++++ .../devicetree/bindings/dma/arm-pl330.txt | 49 ----------- 2 files changed, 83 insertions(+), 49 deletions(-) create mode 100644 Documentation/devicetree/bindings/dma/arm,pl330.yaml delete mode 100644 Documentation/devicetree/bindings/dma/arm-pl330.txt diff --git a/Documentation/devicetree/bindings/dma/arm,pl330.yaml b/Documentation/devicetree/bindings/dma/arm,pl330.yaml new file mode 100644 index 000000000000..decab185cf4d --- /dev/null +++ b/Documentation/devicetree/bindings/dma/arm,pl330.yaml @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/arm,pl330.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM PrimeCell PL330 DMA Controller + +maintainers: + - Vinod Koul + +description: + The ARM PrimeCell PL330 DMA controller can move blocks of memory contents + between memory and peripherals or memory to memory. + +# We need a select here so we don't match all nodes with 'arm,primecell' +select: + properties: + compatible: + contains: + const: arm,pl330 + required: + - compatible + +allOf: + - $ref: dma-controller.yaml# + - $ref: /schemas/arm/primecell.yaml# + +properties: + compatible: + items: + - enum: + - arm,pl330 + - const: arm,primecell + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 32 + description: A single combined interrupt or an interrupt per event + + '#dma-cells': + const: 1 + description: Contains the DMA request number for the consumer + + arm,pl330-broken-no-flushp: + type: boolean + description: quirk for avoiding to execute DMAFLUSHP + + arm,pl330-periph-burst: + type: boolean + description: quirk for performing burst transfer only + + dma-coherent: true + + resets: + minItems: 1 + maxItems: 2 + + reset-names: + minItems: 1 + items: + - const: dma + - const: dma-ocp + +required: + - compatible + - reg + - interrupts + +unevaluatedProperties: false + +examples: + - | + dma-controller@12680000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0x12680000 0x1000>; + interrupts = <99>; + #dma-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt b/Documentation/devicetree/bindings/dma/arm-pl330.txt deleted file mode 100644 index 315e90122afa..000000000000 --- a/Documentation/devicetree/bindings/dma/arm-pl330.txt +++ /dev/null @@ -1,49 +0,0 @@ -* ARM PrimeCell PL330 DMA Controller - -The ARM PrimeCell PL330 DMA controller can move blocks of memory contents -between memory and peripherals or memory to memory. - -Required properties: - - compatible: should include both "arm,pl330" and "arm,primecell". - - reg: physical base address of the controller and length of memory mapped - region. - - interrupts: interrupt number to the cpu. - -Optional properties: - - dma-coherent : Present if dma operations are coherent - - #dma-cells: must be <1>. used to represent the number of integer - cells in the dmas property of client device. - - dma-channels: contains the total number of DMA channels supported by the DMAC - - dma-requests: contains the total number of DMA requests supported by the DMAC - - arm,pl330-broken-no-flushp: quirk for avoiding to execute DMAFLUSHP - - arm,pl330-periph-burst: quirk for performing burst transfer only - - resets: contains an entry for each entry in reset-names. - See ../reset/reset.txt for details. - - reset-names: must contain at least "dma", and optional is "dma-ocp". - -Example: - - pdma0: pdma@12680000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0x12680000 0x1000>; - interrupts = <99>; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; - -Client drivers (device nodes requiring dma transfers from dev-to-mem or -mem-to-dev) should specify the DMA channel numbers and dma channel names -as shown below. - - [property name] = <[phandle of the dma controller] [dma request id]>; - [property name] = <[dma channel name]> - - where 'dma request id' is the dma request number which is connected - to the client controller. The 'property name' 'dmas' and 'dma-names' - as required by the generic dma device tree binding helpers. The dma - names correspond 1:1 with the dma request ids in the dmas property. - - Example: dmas = <&pdma0 12 - &pdma1 11>; - dma-names = "tx", "rx"; From e66d70c034dbdfe1a48863f0865ac86aaf2fef1a Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 13 Dec 2021 10:51:41 +0530 Subject: [PATCH 168/493] dmaengine: xilinx_dpdma: use correct SDPX tag for header file Commit 188c310bdd5d ("dmaengine: xilinx_dpdma: stop using slave_id field") add the header file with incorrect format for SPDX tag, fix that WARNING: Improper SPDX comment style for 'include/linux/dma/xilinx_dpdma.h', please use '/*' instead #1: FILE: include/linux/dma/xilinx_dpdma.h:1: +// SPDX-License-Identifier: GPL-2.0 WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 #1: FILE: include/linux/dma/xilinx_dpdma.h:1: +// SPDX-License-Identifier: GPL-2.0 Fixes: 188c310bdd5d ("dmaengine: xilinx_dpdma: stop using slave_id field") Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20211213052141.850807-1-vkoul@kernel.org Signed-off-by: Vinod Koul --- include/linux/dma/xilinx_dpdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma/xilinx_dpdma.h b/include/linux/dma/xilinx_dpdma.h index 83a1377f03f8..02a4adf8921b 100644 --- a/include/linux/dma/xilinx_dpdma.h +++ b/include/linux/dma/xilinx_dpdma.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_DMA_XILINX_DPDMA_H #define __LINUX_DMA_XILINX_DPDMA_H From 105a8c525675bb7d4d64871f9b2edf39460de881 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 14 Dec 2021 13:42:43 +0900 Subject: [PATCH 169/493] dmaengine: uniphier-xdmac: Fix type of address variables The variables src_addr and dst_addr handle DMA addresses, so these should be declared as dma_addr_t. Fixes: 667b9251440b ("dmaengine: uniphier-xdmac: Add UniPhier external DMA controller driver") Signed-off-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/1639456963-10232-1-git-send-email-hayashi.kunihiko@socionext.com Signed-off-by: Vinod Koul --- drivers/dma/uniphier-xdmac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c index d6b8a202474f..290836b7e1be 100644 --- a/drivers/dma/uniphier-xdmac.c +++ b/drivers/dma/uniphier-xdmac.c @@ -131,8 +131,9 @@ uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc) static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc, struct uniphier_xdmac_desc *xd) { - u32 src_mode, src_addr, src_width; - u32 dst_mode, dst_addr, dst_width; + u32 src_mode, src_width; + u32 dst_mode, dst_width; + dma_addr_t src_addr, dst_addr; u32 val, its, tnum; enum dma_slave_buswidth buswidth; From cfcabbb24d5f4e52ce2e7797cbcfacd8fe932fb6 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Tue, 21 Dec 2021 15:31:29 +0100 Subject: [PATCH 170/493] remoteproc: stm32: Improve crash recovery time When a stop is requested on a crash, it is useless to try to shutdown it gracefully, it is crashed. In this case don't send the STM32_MBX_SHUTDOWN mailbox message that will block the recovery during 500 ms, waiting an answer from the coprocessor. Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20211221143129.18415-1-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier --- drivers/remoteproc/stm32_rproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index b643efcf995a..7d782ed9e589 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -494,7 +494,7 @@ static int stm32_rproc_stop(struct rproc *rproc) int err, idx; /* request shutdown of the remote processor */ - if (rproc->state != RPROC_OFFLINE) { + if (rproc->state != RPROC_OFFLINE && rproc->state != RPROC_CRASHED) { idx = stm32_rproc_mbox_idx(rproc, STM32_MBX_SHUTDOWN); if (idx >= 0 && ddata->mb[idx].chan) { err = mbox_send_message(ddata->mb[idx].chan, "detach"); From 49737f261c412ea7c4ca583317a9b094c0aaed49 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 13 Nov 2021 14:26:14 +0800 Subject: [PATCH 171/493] ata: pata_ali: no need to initialise statics to 0 Static variables do not need to be initialized to 0. Signed-off-by: Jason Wang Signed-off-by: Damien Le Moal --- drivers/ata/pata_ali.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c index b7ff63ed3bbb..ab28a6707b94 100644 --- a/drivers/ata/pata_ali.c +++ b/drivers/ata/pata_ali.c @@ -37,7 +37,7 @@ #define DRV_NAME "pata_ali" #define DRV_VERSION "0.7.8" -static int ali_atapi_dma = 0; +static int ali_atapi_dma; module_param_named(atapi_dma, ali_atapi_dma, int, 0644); MODULE_PARM_DESC(atapi_dma, "Enable ATAPI DMA (0=disable, 1=enable)"); From 08a6df09063818d55c1f0aa8cf3385d8d217b506 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 3 Jan 2022 21:35:37 -0800 Subject: [PATCH 172/493] Input: gpio-keys - avoid clearing twice some memory bitmap_parselist() already clears the 'bits' bitmap, so there is no need to clear it when it is allocated. This just wastes some cycles. Signed-off-by: Christophe JAILLET Acked-by: Paul Cercueil Link: https://lore.kernel.org/r/d6ee621b9dd75b92f8831db365cee58dc2025322.1640813136.git.christophe.jaillet@wanadoo.fr Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 8dbf1e69c90a..d75a8b179a8a 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -247,7 +247,7 @@ static ssize_t gpio_keys_attr_store_helper(struct gpio_keys_drvdata *ddata, ssize_t error; int i; - bits = bitmap_zalloc(n_events, GFP_KERNEL); + bits = bitmap_alloc(n_events, GFP_KERNEL); if (!bits) return -ENOMEM; From 23c72ffedeed6d513144fa09834b1eb0cb2b7373 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 18 Nov 2021 10:38:07 -0800 Subject: [PATCH 173/493] ata: sata_fsl: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Use struct_group() in struct command_desc around members acmd and fill, so they can be referenced together. This will allow memset(), memcpy(), and sizeof() to more easily reason about sizes, improve readability, and avoid future warnings about writing beyond the end of acmd: In function 'fortify_memset_chk', inlined from 'sata_fsl_qc_prep' at drivers/ata/sata_fsl.c:534:3: ./include/linux/fortify-string.h:199:4: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 199 | __write_overflow_field(); | ^~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Kees Cook Signed-off-by: Damien Le Moal --- drivers/ata/sata_fsl.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 3b31a4f596d8..c5a2c1e9ed6b 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -246,8 +246,10 @@ enum { struct command_desc { u8 cfis[8 * 4]; u8 sfis[8 * 4]; - u8 acmd[4 * 4]; - u8 fill[4 * 4]; + struct_group(cdb, + u8 acmd[4 * 4]; + u8 fill[4 * 4]; + ); u32 prdt[SATA_FSL_MAX_PRD_DIRECT * 4]; u32 prdt_indirect[(SATA_FSL_MAX_PRD - SATA_FSL_MAX_PRD_DIRECT) * 4]; }; @@ -531,8 +533,8 @@ static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc) /* setup "ACMD - atapi command" in cmd. desc. if this is ATAPI cmd */ if (ata_is_atapi(qc->tf.protocol)) { desc_info |= ATAPI_CMD; - memset((void *)&cd->acmd, 0, 32); - memcpy((void *)&cd->acmd, qc->cdb, qc->dev->cdb_len); + memset(&cd->cdb, 0, sizeof(cd->cdb)); + memcpy(&cd->cdb, qc->cdb, qc->dev->cdb_len); } if (qc->flags & ATA_QCFLAG_DMAMAP) From 58c541146b6601ad0b12f2a1f8fc925a3a3e0006 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 2 Dec 2021 14:52:57 +0900 Subject: [PATCH 174/493] ata: libata-sata: use sysfs_emit() Use sysfs_emit() instead of snprintf() in sysfs attibute show() functions. Signed-off-by: Damien Le Moal --- drivers/ata/libata-sata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index b9c77885b872..eddd33a3cb5f 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -876,7 +876,7 @@ static ssize_t ata_ncq_prio_enable_show(struct device *device, ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE; spin_unlock_irq(ap->lock); - return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable); + return rc ? rc : sysfs_emit(buf, "%u\n", ncq_prio_enable); } static ssize_t ata_ncq_prio_enable_store(struct device *device, @@ -972,7 +972,7 @@ ata_scsi_em_message_type_show(struct device *dev, struct device_attribute *attr, struct Scsi_Host *shost = class_to_shost(dev); struct ata_port *ap = ata_shost_to_port(shost); - return snprintf(buf, 23, "%d\n", ap->em_message_type); + return sysfs_emit(buf, "%d\n", ap->em_message_type); } DEVICE_ATTR(em_message_type, S_IRUGO, ata_scsi_em_message_type_show, NULL); From 0667391e191c00fb80d5a227bef977ecb12b5b70 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 2 Dec 2021 14:56:47 +0900 Subject: [PATCH 175/493] ata: libata-scsi: use sysfs_emit() Use sysfs_emit() instead of snprintf() in ata_scsi_park_show(). Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 313e9475507b..5f27f5c29907 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -121,7 +121,7 @@ static ssize_t ata_scsi_park_show(struct device *device, unlock: spin_unlock_irq(ap->lock); - return rc ? rc : snprintf(buf, 20, "%u\n", msecs); + return rc ? rc : sysfs_emit(buf, "%u\n", msecs); } static ssize_t ata_scsi_park_store(struct device *device, From 179a028225c145171a2e95abbc69b579f72cdf5a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 2 Dec 2021 15:02:17 +0900 Subject: [PATCH 176/493] ata: ahci: use sysfs_emit() Use sysfs_emit() instead of sprintf in remapped_nvme_show(). Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 1e1167e725a4..98d04a780458 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1657,7 +1657,7 @@ static ssize_t remapped_nvme_show(struct device *dev, struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; - return sprintf(buf, "%u\n", hpriv->remapped_nvme); + return sysfs_emit(buf, "%u\n", hpriv->remapped_nvme); } static DEVICE_ATTR_RO(remapped_nvme); From ab0efc068ebf73dcec6198c05b8a8111f0d48aea Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 2 Dec 2021 15:05:03 +0900 Subject: [PATCH 177/493] ata: sata_fsl: use sysfs_emit() Use sysfs_emit() instead of sprintf() in fsl_sata_intr_coalescing_show() and fsl_sata_rx_watermark_show(). Signed-off-by: Damien Le Moal --- drivers/ata/sata_fsl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index c5a2c1e9ed6b..ec52511ae60f 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -322,7 +322,7 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host, static ssize_t fsl_sata_intr_coalescing_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d %d\n", + return sysfs_emit(buf, "%d %d\n", intr_coalescing_count, intr_coalescing_ticks); } @@ -357,9 +357,9 @@ static ssize_t fsl_sata_rx_watermark_show(struct device *dev, spin_lock_irqsave(&host->lock, flags); rx_watermark = ioread32(csr_base + TRANSCFG); rx_watermark &= 0x1f; - spin_unlock_irqrestore(&host->lock, flags); - return sprintf(buf, "%d\n", rx_watermark); + + return sysfs_emit(buf, "%d\n", rx_watermark); } static ssize_t fsl_sata_rx_watermark_store(struct device *dev, From f713961de5057765bc663db72b7e540f27827750 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 16:35:17 +0200 Subject: [PATCH 178/493] ata: sata_dwc_460ex: Use devm_platform_*ioremap_resource() APIs Use devm_platform_get_and_ioremap_resource() and devm_platform_ioremap_resource() APIs instead of their open coded analogues. Signed-off-by: Andy Shevchenko Signed-off-by: Damien Le Moal --- drivers/ata/sata_dwc_460ex.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 338c2e50f759..513bee589d12 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -237,7 +237,6 @@ static int sata_dwc_dma_init_old(struct platform_device *pdev, struct sata_dwc_device *hsdev) { struct device_node *np = pdev->dev.of_node; - struct resource *res; hsdev->dma = devm_kzalloc(&pdev->dev, sizeof(*hsdev->dma), GFP_KERNEL); if (!hsdev->dma) @@ -254,8 +253,7 @@ static int sata_dwc_dma_init_old(struct platform_device *pdev, } /* Get physical SATA DMA register base address */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - hsdev->dma->regs = devm_ioremap_resource(&pdev->dev, res); + hsdev->dma->regs = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(hsdev->dma->regs)) return PTR_ERR(hsdev->dma->regs); @@ -1228,8 +1226,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) host->private_data = hsdev; /* Ioremap SATA registers */ - res = platform_get_resource(ofdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(&ofdev->dev, res); + base = devm_platform_get_and_ioremap_resource(ofdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); dev_dbg(&ofdev->dev, "ioremap done for SATA register address\n"); From f1550f27f8a92a4d29329aeeb28b743365abceae Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 16:35:18 +0200 Subject: [PATCH 179/493] ata: sata_dwc_460ex: Use temporary variable for struct device Use temporary variable for struct device to make code neater. Signed-off-by: Andy Shevchenko Signed-off-by: Damien Le Moal --- drivers/ata/sata_dwc_460ex.c | 37 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 513bee589d12..5421f74c0199 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -215,9 +215,10 @@ static int sata_dwc_dma_get_channel_old(struct sata_dwc_device_port *hsdevp) { struct sata_dwc_device *hsdev = hsdevp->hsdev; struct dw_dma_slave *dws = &sata_dwc_dma_dws; + struct device *dev = hsdev->dev; dma_cap_mask_t mask; - dws->dma_dev = hsdev->dev; + dws->dma_dev = dev; dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); @@ -225,8 +226,7 @@ static int sata_dwc_dma_get_channel_old(struct sata_dwc_device_port *hsdevp) /* Acquire DMA channel */ hsdevp->chan = dma_request_channel(mask, sata_dwc_dma_filter, hsdevp); if (!hsdevp->chan) { - dev_err(hsdev->dev, "%s: dma channel unavailable\n", - __func__); + dev_err(dev, "%s: dma channel unavailable\n", __func__); return -EAGAIN; } @@ -236,19 +236,20 @@ static int sata_dwc_dma_get_channel_old(struct sata_dwc_device_port *hsdevp) static int sata_dwc_dma_init_old(struct platform_device *pdev, struct sata_dwc_device *hsdev) { - struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; - hsdev->dma = devm_kzalloc(&pdev->dev, sizeof(*hsdev->dma), GFP_KERNEL); + hsdev->dma = devm_kzalloc(dev, sizeof(*hsdev->dma), GFP_KERNEL); if (!hsdev->dma) return -ENOMEM; - hsdev->dma->dev = &pdev->dev; + hsdev->dma->dev = dev; hsdev->dma->id = pdev->id; /* Get SATA DMA interrupt number */ hsdev->dma->irq = irq_of_parse_and_map(np, 1); if (hsdev->dma->irq == NO_IRQ) { - dev_err(&pdev->dev, "no SATA DMA irq\n"); + dev_err(dev, "no SATA DMA irq\n"); return -ENODEV; } @@ -1205,6 +1206,8 @@ static const struct ata_port_info sata_dwc_port_info[] = { static int sata_dwc_probe(struct platform_device *ofdev) { + struct device *dev = &ofdev->dev; + struct device_node *np = dev->of_node; struct sata_dwc_device *hsdev; u32 idr, versionr; char *ver = (char *)&versionr; @@ -1214,12 +1217,11 @@ static int sata_dwc_probe(struct platform_device *ofdev) struct ata_host *host; struct ata_port_info pi = sata_dwc_port_info[0]; const struct ata_port_info *ppi[] = { &pi, NULL }; - struct device_node *np = ofdev->dev.of_node; struct resource *res; /* Allocate DWC SATA device */ - host = ata_host_alloc_pinfo(&ofdev->dev, ppi, SATA_DWC_MAX_PORTS); - hsdev = devm_kzalloc(&ofdev->dev, sizeof(*hsdev), GFP_KERNEL); + host = ata_host_alloc_pinfo(dev, ppi, SATA_DWC_MAX_PORTS); + hsdev = devm_kzalloc(dev, sizeof(*hsdev), GFP_KERNEL); if (!host || !hsdev) return -ENOMEM; @@ -1229,7 +1231,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) base = devm_platform_get_and_ioremap_resource(ofdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); - dev_dbg(&ofdev->dev, "ioremap done for SATA register address\n"); + dev_dbg(dev, "ioremap done for SATA register address\n"); /* Synopsys DWC SATA specific Registers */ hsdev->sata_dwc_regs = base + SATA_DWC_REG_OFFSET; @@ -1243,11 +1245,10 @@ static int sata_dwc_probe(struct platform_device *ofdev) /* Read the ID and Version Registers */ idr = sata_dwc_readl(&hsdev->sata_dwc_regs->idr); versionr = sata_dwc_readl(&hsdev->sata_dwc_regs->versionr); - dev_notice(&ofdev->dev, "id %d, controller version %c.%c%c\n", - idr, ver[0], ver[1], ver[2]); + dev_notice(dev, "id %d, controller version %c.%c%c\n", idr, ver[0], ver[1], ver[2]); /* Save dev for later use in dev_xxx() routines */ - hsdev->dev = &ofdev->dev; + hsdev->dev = dev; /* Enable SATA Interrupts */ sata_dwc_enable_interrupts(hsdev); @@ -1255,7 +1256,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) /* Get SATA interrupt number */ irq = irq_of_parse_and_map(np, 0); if (irq == NO_IRQ) { - dev_err(&ofdev->dev, "no SATA DMA irq\n"); + dev_err(dev, "no SATA DMA irq\n"); return -ENODEV; } @@ -1267,7 +1268,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) } #endif - hsdev->phy = devm_phy_optional_get(hsdev->dev, "sata-phy"); + hsdev->phy = devm_phy_optional_get(dev, "sata-phy"); if (IS_ERR(hsdev->phy)) return PTR_ERR(hsdev->phy); @@ -1282,7 +1283,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) */ err = ata_host_activate(host, irq, sata_dwc_isr, 0, &sata_dwc_sht); if (err) - dev_err(&ofdev->dev, "failed to activate host"); + dev_err(dev, "failed to activate host"); return 0; @@ -1306,7 +1307,7 @@ static int sata_dwc_remove(struct platform_device *ofdev) sata_dwc_dma_exit_old(hsdev); #endif - dev_dbg(&ofdev->dev, "done\n"); + dev_dbg(dev, "done\n"); return 0; } From 0805e945651d6cf3a08349fd041c2049dadc76d9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 16:35:19 +0200 Subject: [PATCH 180/493] ata: sata_dwc_460ex: Remove unused forward declaration sata_dwc_port_stop() is not used before being defined, remove redundant forward declaration. Signed-off-by: Andy Shevchenko Signed-off-by: Damien Le Moal --- drivers/ata/sata_dwc_460ex.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 5421f74c0199..bd4859563796 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -185,7 +185,6 @@ static void sata_dwc_bmdma_start_by_tag(struct ata_queued_cmd *qc, u8 tag); static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc, u32 check_status); static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status); -static void sata_dwc_port_stop(struct ata_port *ap); static void sata_dwc_clear_dmacr(struct sata_dwc_device_port *hsdevp, u8 tag); #ifdef CONFIG_SATA_DWC_OLD_DMA From ea63a8990151948b87dfc0502028bcc7d6724dbc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 16:59:37 +0200 Subject: [PATCH 181/493] ata: libahci_platform: Remove bogus 32-bit DMA mask attempt If 64-bit mask attempt fails, the 32-bit will fail by the very same reason. Don't even try the latter. It's a continuation of the changes that contains, e.g. dcc02c19cc06 ("sata_sil24: use dma_set_mask_and_coherent"). Signed-off-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Damien Le Moal --- drivers/ata/libahci_platform.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 0910441321f7..bfa267e6f045 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -642,13 +642,8 @@ int ahci_platform_init_host(struct platform_device *pdev, if (hpriv->cap & HOST_CAP_64) { rc = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (rc) { - rc = dma_coerce_mask_and_coherent(dev, - DMA_BIT_MASK(32)); - if (rc) { - dev_err(dev, "Failed to enable 64-bit DMA.\n"); - return rc; - } - dev_warn(dev, "Enable 32-bit DMA instead of 64-bit.\n"); + dev_err(dev, "Failed to enable 64-bit DMA.\n"); + return rc; } } From 7b6acb4e7faa8590ac9b56874a2129a977da8890 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 Dec 2021 13:28:32 +0200 Subject: [PATCH 182/493] ata: libahci_platform: Get rid of dup message when IRQ can't be retrieved platform_get_irq() will print a message when it fails. No need to repeat this. Signed-off-by: Andy Shevchenko Signed-off-by: Damien Le Moal --- drivers/ata/libahci_platform.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index bfa267e6f045..18296443ccba 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -579,11 +579,8 @@ int ahci_platform_init_host(struct platform_device *pdev, int i, irq, n_ports, rc; irq = platform_get_irq(pdev, 0); - if (irq < 0) { - if (irq != -EPROBE_DEFER) - dev_err(dev, "no irq\n"); + if (irq < 0) return irq; - } if (!irq) return -EINVAL; From da29947057950232f4ad8e0e118d2d5002daaf2b Mon Sep 17 00:00:00 2001 From: Changcheng Deng Date: Mon, 20 Dec 2021 11:33:58 +0000 Subject: [PATCH 183/493] ata: libata: use min() to make code cleaner Use min() in order to make code cleaner. Reported-by: Zeal Robot Signed-off-by: Changcheng Deng Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 5f27f5c29907..b0b7ab46a03c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3591,10 +3591,7 @@ static int ata_mselect_caching(struct ata_queued_cmd *qc, */ if (len != CACHE_MPAGE_LEN - 2) { - if (len < CACHE_MPAGE_LEN - 2) - *fp = len; - else - *fp = CACHE_MPAGE_LEN - 2; + *fp = min(len, CACHE_MPAGE_LEN - 2); return -EINVAL; } @@ -3647,10 +3644,7 @@ static int ata_mselect_control(struct ata_queued_cmd *qc, */ if (len != CONTROL_MPAGE_LEN - 2) { - if (len < CONTROL_MPAGE_LEN - 2) - *fp = len; - else - *fp = CONTROL_MPAGE_LEN - 2; + *fp = min(len, CONTROL_MPAGE_LEN - 2); return -EINVAL; } From f3b9db5f4fd1f65b44935d22b6fe0016aa62d5c0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:24 +0100 Subject: [PATCH 184/493] ata: libata: remove pointless debugging messages Debugging messages in pci init functions or sg setup are pretty much pointless, as the workflow pretty much decides what happened. So drop them. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/acard-ahci.c | 4 ---- drivers/ata/ahci.c | 2 -- drivers/ata/ata_piix.c | 3 --- drivers/ata/libahci.c | 3 --- drivers/ata/libata-core.c | 11 ----------- drivers/ata/libata-sff.c | 2 -- drivers/ata/sata_nv.c | 4 ---- 7 files changed, 29 deletions(-) diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c index 2a04e8abd397..536d4cb8f08b 100644 --- a/drivers/ata/acard-ahci.c +++ b/drivers/ata/acard-ahci.c @@ -185,8 +185,6 @@ static unsigned int acard_ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl) struct acard_sg *acard_sg = cmd_tbl + AHCI_CMD_TBL_HDR_SZ; unsigned int si, last_si = 0; - VPRINTK("ENTER\n"); - /* * Next, the S/G list. */ @@ -362,8 +360,6 @@ static int acard_ahci_init_one(struct pci_dev *pdev, const struct pci_device_id struct ata_host *host; int n_ports, i, rc; - VPRINTK("ENTER\n"); - WARN_ON((int)ATA_MAX_QUEUE > AHCI_MAX_CMDS); ata_print_version_once(&pdev->dev, DRV_VERSION); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 98d04a780458..d5bed24a52ba 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1673,8 +1673,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) int n_ports, i, rc; int ahci_pci_bar = AHCI_PCI_BAR_STANDARD; - VPRINTK("ENTER\n"); - WARN_ON((int)ATA_MAX_QUEUE > AHCI_MAX_CMDS); ata_print_version_once(&pdev->dev, DRV_VERSION); diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 0b2fcf0d1d6c..eb6bf30bd2e3 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -1345,7 +1345,6 @@ static void piix_init_pcs(struct ata_host *host, new_pcs = pcs | map_db->port_enable; if (new_pcs != pcs) { - DPRINTK("updating PCS from 0x%x to 0x%x\n", pcs, new_pcs); pci_write_config_word(pdev, ICH5_PCS, new_pcs); msleep(150); } @@ -1769,14 +1768,12 @@ static int __init piix_init(void) { int rc; - DPRINTK("pci_register_driver\n"); rc = pci_register_driver(&piix_pci_driver); if (rc) return rc; in_module_init = 0; - DPRINTK("done\n"); return 0; } diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index f76b8418e6fb..94edbc89a48c 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1620,8 +1620,6 @@ static unsigned int ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl) struct ahci_sg *ahci_sg = cmd_tbl + AHCI_CMD_TBL_HDR_SZ; unsigned int si; - VPRINTK("ENTER\n"); - /* * Next, the S/G list. */ @@ -1695,7 +1693,6 @@ static void ahci_fbs_dec_intr(struct ata_port *ap) u32 fbs = readl(port_mmio + PORT_FBS); int retries = 3; - DPRINTK("ENTER\n"); BUG_ON(!pp->fbs_enabled); /* time to wait for DEC is not specified by AHCI spec, diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index aba0c67d1bd6..72f56c32fe83 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -764,9 +764,6 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, head = track % dev->heads; sect = (u32)block % dev->sectors + 1; - DPRINTK("block %u track %u cyl %u head %u sect %u\n", - (u32)block, track, cyl, head, sect); - /* Check whether the converted CHS can fit. Cylinder: 0-65535 Head: 0-15 @@ -4569,8 +4566,6 @@ static int ata_sg_setup(struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; unsigned int n_elem; - VPRINTK("ENTER, ata%u\n", ap->print_id); - n_elem = dma_map_sg(ap->dev, qc->sg, qc->n_elem, qc->dma_dir); if (n_elem < 1) return -1; @@ -5375,8 +5370,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host) { struct ata_port *ap; - DPRINTK("ENTER\n"); - ap = kzalloc(sizeof(*ap), GFP_KERNEL); if (!ap) return NULL; @@ -5493,8 +5486,6 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports) int i; void *dr; - DPRINTK("ENTER\n"); - /* alloc a container for our list of ATA ports (buses) */ sz = sizeof(struct ata_host) + (max_ports + 1) * sizeof(void *); host = kzalloc(sz, GFP_KERNEL); @@ -5784,9 +5775,7 @@ int ata_port_probe(struct ata_port *ap) __ata_port_probe(ap); ata_port_wait_eh(ap); } else { - DPRINTK("ata%u: bus probe begin\n", ap->print_id); rc = ata_bus_probe(ap); - DPRINTK("ata%u: bus probe end\n", ap->print_id); } return rc; } diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index b71ea4a680b0..39c026f3948c 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -2467,8 +2467,6 @@ static int ata_pci_init_one(struct pci_dev *pdev, struct ata_host *host = NULL; int rc; - DPRINTK("ENTER\n"); - pi = ata_sff_find_valid_pi(ppi); if (!pi) { dev_err(&pdev->dev, "no valid port_info specified\n"); diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 16272c111208..3c70405a0b80 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -1277,8 +1277,6 @@ static int nv_adma_host_init(struct ata_host *host) unsigned int i; u32 tmp32; - VPRINTK("ENTER\n"); - /* enable ADMA on the ports */ pci_read_config_dword(pdev, NV_MCP_SATA_CFG_20, &tmp32); tmp32 |= NV_MCP_SATA_CFG_20_PORT0_EN | @@ -1320,8 +1318,6 @@ static void nv_adma_fill_sg(struct ata_queued_cmd *qc, struct nv_adma_cpb *cpb) struct scatterlist *sg; unsigned int si; - VPRINTK("ENTER\n"); - for_each_sg(qc->sg, sg, qc->n_elem, si) { aprd = (si < 5) ? &cpb->aprd[si] : &pp->aprd[NV_ADMA_SGTBL_LEN * qc->hw_tag + (si-5)]; From bb6a42d7104644bb8d59fac9a93b69d7790ff15e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:25 +0100 Subject: [PATCH 185/493] ata: libata: whitespace cleanup Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-transport.c | 18 +++++++++--------- drivers/ata/libata.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 34bb4608bdc6..4162d625fc92 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -163,7 +163,7 @@ static struct { { AC_ERR_INVALID, "InvalidArg" }, { AC_ERR_OTHER, "Unknown" }, { AC_ERR_NODEV_HINT, "NoDeviceHint" }, - { AC_ERR_NCQ, "NCQError" } + { AC_ERR_NCQ, "NCQError" } }; ata_bitfield_name_match(err, ata_err_names) @@ -327,7 +327,7 @@ int ata_tport_add(struct device *parent, */ static int noop(int x) { return x; } -#define ata_link_show_linkspeed(field, format) \ +#define ata_link_show_linkspeed(field, format) \ static ssize_t \ show_ata_link_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -416,7 +416,7 @@ int ata_tlink_add(struct ata_link *link) dev->release = ata_tlink_release; if (ata_is_host_link(link)) dev_set_name(dev, "link%d", ap->print_id); - else + else dev_set_name(dev, "link%d.%d", ap->print_id, link->pmp); transport_setup_device(dev); @@ -472,7 +472,7 @@ ata_dev_attr(xfer, dma_mode); ata_dev_attr(xfer, xfer_mode); -#define ata_dev_show_simple(field, format_string, cast) \ +#define ata_dev_show_simple(field, format_string, cast) \ static ssize_t \ show_ata_dev_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -482,9 +482,9 @@ show_ata_dev_##field(struct device *dev, \ return scnprintf(buf, 20, format_string, cast ata_dev->field); \ } -#define ata_dev_simple_attr(field, format_string, type) \ +#define ata_dev_simple_attr(field, format_string, type) \ ata_dev_show_simple(field, format_string, (type)) \ -static DEVICE_ATTR(field, S_IRUGO, \ + static DEVICE_ATTR(field, S_IRUGO, \ show_ata_dev_##field, NULL) ata_dev_simple_attr(spdn_cnt, "%d\n", int); @@ -502,7 +502,7 @@ static int ata_show_ering(struct ata_ering_entry *ent, void *void_arg) seconds = div_u64_rem(ent->timestamp, HZ, &rem); arg->written += sprintf(arg->buf + arg->written, - "[%5llu.%09lu]", seconds, + "[%5llu.%09lu]", seconds, rem * NSEC_PER_SEC / HZ); arg->written += get_ata_err_names(ent->err_mask, arg->buf + arg->written); @@ -667,7 +667,7 @@ static int ata_tdev_add(struct ata_device *ata_dev) dev->release = ata_tdev_release; if (ata_is_host_link(link)) dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno); - else + else dev_set_name(dev, "dev%d.%d.0", ap->print_id, link->pmp); transport_setup_device(dev); @@ -689,7 +689,7 @@ static int ata_tdev_add(struct ata_device *ata_dev) */ #define SETUP_TEMPLATE(attrb, field, perm, test) \ - i->private_##attrb[count] = dev_attr_##field; \ + i->private_##attrb[count] = dev_attr_##field; \ i->private_##attrb[count].attr.mode = perm; \ i->attrb[count] = &i->private_##attrb[count]; \ if (test) \ diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 68cdd81d747c..4a8f4623cfe5 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -179,7 +179,7 @@ extern int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, extern void ata_eh_finish(struct ata_port *ap); extern int ata_ering_map(struct ata_ering *ering, int (*map_fn)(struct ata_ering_entry *, void *), - void *arg); + void *arg); extern unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key); extern unsigned int atapi_eh_request_sense(struct ata_device *dev, u8 *sense_buf, u8 dfl_sense_key); From 6c952a0dc9c3ced98c4c8aa7cd11c25c59157f1f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:26 +0100 Subject: [PATCH 186/493] ata: libata: Add ata_port_classify() helper Add an ata_port_classify() helper to print out the results from the device classification and remove the debugging statements from ata_dev_classify(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libahci.c | 2 +- drivers/ata/libata-core.c | 21 +++++---------------- drivers/ata/libata-sff.c | 2 +- drivers/ata/libata-transport.c | 30 ++++++++++++++++++++++++++++++ drivers/ata/sata_fsl.c | 2 +- drivers/ata/sata_inic162x.c | 2 +- drivers/ata/sata_sil24.c | 2 +- include/linux/libata.h | 2 ++ 8 files changed, 42 insertions(+), 21 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 94edbc89a48c..b7b460560a92 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1300,7 +1300,7 @@ unsigned int ahci_dev_classify(struct ata_port *ap) tf.lbal = (tmp >> 8) & 0xff; tf.nsect = (tmp) & 0xff; - return ata_dev_classify(&tf); + return ata_port_classify(ap, &tf); } EXPORT_SYMBOL_GPL(ahci_dev_classify); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 72f56c32fe83..28645ac04d9f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1007,32 +1007,21 @@ unsigned int ata_dev_classify(const struct ata_taskfile *tf) * SEMB signature. This is worked around in * ata_dev_read_id(). */ - if ((tf->lbam == 0) && (tf->lbah == 0)) { - DPRINTK("found ATA device by sig\n"); + if (tf->lbam == 0 && tf->lbah == 0) return ATA_DEV_ATA; - } - if ((tf->lbam == 0x14) && (tf->lbah == 0xeb)) { - DPRINTK("found ATAPI device by sig\n"); + if (tf->lbam == 0x14 && tf->lbah == 0xeb) return ATA_DEV_ATAPI; - } - if ((tf->lbam == 0x69) && (tf->lbah == 0x96)) { - DPRINTK("found PMP device by sig\n"); + if (tf->lbam == 0x69 && tf->lbah == 0x96) return ATA_DEV_PMP; - } - if ((tf->lbam == 0x3c) && (tf->lbah == 0xc3)) { - DPRINTK("found SEMB device by sig (could be ATA device)\n"); + if (tf->lbam == 0x3c && tf->lbah == 0xc3) return ATA_DEV_SEMB; - } - if ((tf->lbam == 0xcd) && (tf->lbah == 0xab)) { - DPRINTK("found ZAC device by sig\n"); + if (tf->lbam == 0xcd && tf->lbah == 0xab) return ATA_DEV_ZAC; - } - DPRINTK("unknown device\n"); return ATA_DEV_UNKNOWN; } EXPORT_SYMBOL_GPL(ata_dev_classify); diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 39c026f3948c..a119fabe0919 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1853,7 +1853,7 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present, return ATA_DEV_NONE; /* determine if device is ATA or ATAPI */ - class = ata_dev_classify(&tf); + class = ata_port_classify(ap, &tf); if (class == ATA_DEV_UNKNOWN) { /* If the device failed diagnostic, it's likely to diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 4162d625fc92..ca129854a88c 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -321,6 +321,36 @@ int ata_tport_add(struct device *parent, return error; } +/** + * ata_port_classify - determine device type based on ATA-spec signature + * @ap: ATA port device on which the classification should be run + * @tf: ATA taskfile register set for device to be identified + * + * A wrapper around ata_dev_classify() to provide additional logging + * + * RETURNS: + * Device type, %ATA_DEV_ATA, %ATA_DEV_ATAPI, %ATA_DEV_PMP, + * %ATA_DEV_ZAC, or %ATA_DEV_UNKNOWN the event of failure. + */ +unsigned int ata_port_classify(struct ata_port *ap, + const struct ata_taskfile *tf) +{ + int i; + unsigned int class = ata_dev_classify(tf); + + /* Start with index '1' to skip the 'unknown' entry */ + for (i = 1; i < ARRAY_SIZE(ata_class_names); i++) { + if (ata_class_names[i].value == class) { + ata_port_dbg(ap, "found %s device by sig\n", + ata_class_names[i].name); + return class; + } + } + + ata_port_info(ap, "found unknown device (class %u)\n", class); + return class; +} +EXPORT_SYMBOL_GPL(ata_port_classify); /* * ATA link attributes diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index ec52511ae60f..7504d9fbff2a 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -814,7 +814,7 @@ static unsigned int sata_fsl_dev_classify(struct ata_port *ap) tf.lbal = (temp >> 8) & 0xff; tf.nsect = temp & 0xff; - return ata_dev_classify(&tf); + return ata_port_classify(ap, &tf); } static int sata_fsl_hardreset(struct ata_link *link, unsigned int *class, diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index e517bd8822a5..b6239dae524a 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -657,7 +657,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class, } inic_tf_read(ap, &tf); - *class = ata_dev_classify(&tf); + *class = ata_port_classify(ap, &tf); } return 0; diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index f99ec6f7d7c0..7e9c1945dc81 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -680,7 +680,7 @@ static int sil24_softreset(struct ata_link *link, unsigned int *class, } sil24_read_tf(ap, 0, &tf); - *class = ata_dev_classify(&tf); + *class = ata_port_classify(ap, &tf); DPRINTK("EXIT, class=%u\n", *class); return 0; diff --git a/include/linux/libata.h b/include/linux/libata.h index 2a8404b26083..235fdbeb19ea 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1160,6 +1160,8 @@ extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, unsigned int n_elem); extern unsigned int ata_dev_classify(const struct ata_taskfile *tf); +extern unsigned int ata_port_classify(struct ata_port *ap, + const struct ata_taskfile *tf); extern void ata_dev_disable(struct ata_device *adev); extern void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); From 6044f3c456dc5f4a013e629da8632fab1d50d08e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:27 +0100 Subject: [PATCH 187/493] ata: libata: move ata_dump_id() to dynamic debugging Use ata_dev_dbg() to print out the information in ata_dump_id() and remove the ata_msg_probe() conditional. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 28645ac04d9f..d316fbf13309 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1341,6 +1341,7 @@ static int ata_hpa_resize(struct ata_device *dev) /** * ata_dump_id - IDENTIFY DEVICE info debugging output + * @dev: device from which the information is fetched * @id: IDENTIFY DEVICE page to dump * * Dump selected 16-bit words from the given IDENTIFY DEVICE @@ -1350,32 +1351,14 @@ static int ata_hpa_resize(struct ata_device *dev) * caller. */ -static inline void ata_dump_id(const u16 *id) +static inline void ata_dump_id(struct ata_device *dev, const u16 *id) { - DPRINTK("49==0x%04x " - "53==0x%04x " - "63==0x%04x " - "64==0x%04x " - "75==0x%04x \n", - id[49], - id[53], - id[63], - id[64], - id[75]); - DPRINTK("80==0x%04x " - "81==0x%04x " - "82==0x%04x " - "83==0x%04x " - "84==0x%04x \n", - id[80], - id[81], - id[82], - id[83], - id[84]); - DPRINTK("88==0x%04x " - "93==0x%04x\n", - id[88], - id[93]); + ata_dev_dbg(dev, + "49==0x%04x 53==0x%04x 63==0x%04x 64==0x%04x 75==0x%04x\n" + "80==0x%04x 81==0x%04x 82==0x%04x 83==0x%04x 84==0x%04x\n" + "88==0x%04x 93==0x%04x\n", + id[49], id[53], id[63], id[64], id[75], id[80], + id[81], id[82], id[83], id[84], id[88], id[93]); } /** @@ -2632,8 +2615,7 @@ int ata_dev_configure(struct ata_device *dev) /* find max transfer mode; for printk only */ xfer_mask = ata_id_xfermask(id); - if (ata_msg_probe(ap)) - ata_dump_id(id); + ata_dump_id(dev, id); /* SCSI only uses 4-char revisions, dump full 8 chars from ATA */ ata_id_c_string(dev->id, fwrevbuf, ATA_ID_FW_REV, From 4baa5745ec21efdce3470945a3ff6831b3e6c071 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:28 +0100 Subject: [PATCH 188/493] ata: libata: sanitize ATA_HORKAGE_DUMP_ID With moving ata_dev_dbg() over to dynamic debugging ATA_HORKAGE_DUMP_ID will now print out the raw IDENTIFY data without a header unless explicitly enable via dyndebug. So move the logging level up to INFO and have the header printed always. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d316fbf13309..73020a6d125e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1848,10 +1848,10 @@ retry: } if (dev->horkage & ATA_HORKAGE_DUMP_ID) { - ata_dev_dbg(dev, "dumping IDENTIFY data, " + ata_dev_info(dev, "dumping IDENTIFY data, " "class=%d may_fallback=%d tried_spinup=%d\n", class, may_fallback, tried_spinup); - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 2, id, ATA_ID_WORDS * sizeof(*id), true); } From f8ec26d0f5bcdb864f771fb6d250d9ed3165eb61 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:29 +0100 Subject: [PATCH 189/493] ata: libata: add reset tracepoints To follow the flow of control we should be using tracepoints, as they will tie in with the actual I/O flow and deliver a better overview about what it happening. This patch adds tracepoints for hard reset, soft reset, and postreset and adds them in the libata-eh control flow. With that we can drop the reset DPRINTK calls in the various drivers. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 7 --- drivers/ata/ahci_qoriq.c | 4 -- drivers/ata/libahci.c | 10 ---- drivers/ata/libata-core.c | 4 -- drivers/ata/libata-eh.c | 21 ++++++-- drivers/ata/libata-sff.c | 11 +--- drivers/ata/pata_octeon_cf.c | 2 - drivers/ata/sata_fsl.c | 10 ---- drivers/ata/sata_rcar.c | 4 -- drivers/ata/sata_sil24.c | 3 -- include/trace/events/libata.h | 96 +++++++++++++++++++++++++++++++++++ 11 files changed, 115 insertions(+), 57 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index d5bed24a52ba..3939afeca388 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -700,8 +700,6 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, bool online; int rc; - DPRINTK("ENTER\n"); - hpriv->stop_engine(ap); rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context), @@ -709,8 +707,6 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, hpriv->start_engine(ap); - DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class); - /* vt8251 doesn't clear BSY on signature FIS reception, * request follow-up softreset. */ @@ -790,8 +786,6 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, bool online; int rc, i; - DPRINTK("ENTER\n"); - hpriv->stop_engine(ap); for (i = 0; i < 2; i++) { @@ -829,7 +823,6 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, if (online) *class = ahci_dev_classify(ap); - DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class); return rc; } diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c index 5b46fc9aeb4a..bf5b388bd4e0 100644 --- a/drivers/ata/ahci_qoriq.c +++ b/drivers/ata/ahci_qoriq.c @@ -103,8 +103,6 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class, int rc; bool ls1021a_workaround = (qoriq_priv->type == AHCI_LS1021A); - DPRINTK("ENTER\n"); - hpriv->stop_engine(ap); /* @@ -146,8 +144,6 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class, if (online) *class = ahci_dev_classify(ap); - - DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class); return rc; } diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index b7b460560a92..b6f674a1fddc 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1415,8 +1415,6 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class, bool fbs_disabled = false; int rc; - DPRINTK("ENTER\n"); - /* prepare for SRST (AHCI-1.1 10.4.1) */ rc = ahci_kick_engine(ap); if (rc && rc != -EOPNOTSUPP) @@ -1476,7 +1474,6 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class, if (fbs_disabled) ahci_enable_fbs(ap); - DPRINTK("EXIT, class=%u\n", *class); return 0; fail: @@ -1498,8 +1495,6 @@ static int ahci_softreset(struct ata_link *link, unsigned int *class, { int pmp = sata_srst_pmp(link); - DPRINTK("ENTER\n"); - return ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready); } EXPORT_SYMBOL_GPL(ahci_do_softreset); @@ -1529,8 +1524,6 @@ static int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class, int rc; u32 irq_sts; - DPRINTK("ENTER\n"); - rc = ahci_do_softreset(link, class, pmp, deadline, ahci_bad_pmp_check_ready); @@ -1564,8 +1557,6 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, struct ata_taskfile tf; int rc; - DPRINTK("ENTER\n"); - hpriv->stop_engine(ap); /* clear D2H reception area to properly wait for D2H FIS */ @@ -1581,7 +1572,6 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, if (*online) *class = ahci_dev_classify(ap); - DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class); return rc; } EXPORT_SYMBOL_GPL(ahci_do_hardreset); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 73020a6d125e..c1946336f81e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3656,16 +3656,12 @@ void ata_std_postreset(struct ata_link *link, unsigned int *classes) { u32 serror; - DPRINTK("ENTER\n"); - /* reset complete, clear SError */ if (!sata_scr_read(link, SCR_ERROR, &serror)) sata_scr_write(link, SCR_ERROR, serror); /* print link status */ sata_print_link_status(link); - - DPRINTK("EXIT\n"); } EXPORT_SYMBOL_GPL(ata_std_postreset); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 1d4a6f1e88cd..043a1c846f2c 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2596,12 +2596,19 @@ int ata_eh_reset(struct ata_link *link, int classify, /* mark that this EH session started with reset */ ehc->last_reset = jiffies; - if (reset == hardreset) + if (reset == hardreset) { ehc->i.flags |= ATA_EHI_DID_HARDRESET; - else + trace_ata_link_hardreset_begin(link, classes, deadline); + } else { ehc->i.flags |= ATA_EHI_DID_SOFTRESET; + trace_ata_link_softreset_begin(link, classes, deadline); + } rc = ata_do_reset(link, reset, classes, deadline, true); + if (reset == hardreset) + trace_ata_link_hardreset_end(link, classes, rc); + else + trace_ata_link_softreset_end(link, classes, rc); if (rc && rc != -EAGAIN) { failed_link = link; goto fail; @@ -2615,8 +2622,11 @@ int ata_eh_reset(struct ata_link *link, int classify, ata_link_info(slave, "hard resetting link\n"); ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); + trace_ata_slave_hardreset_begin(slave, classes, + deadline); tmp = ata_do_reset(slave, reset, classes, deadline, false); + trace_ata_slave_hardreset_end(slave, classes, tmp); switch (tmp) { case -EAGAIN: rc = -EAGAIN; @@ -2644,7 +2654,9 @@ int ata_eh_reset(struct ata_link *link, int classify, } ata_eh_about_to_do(link, NULL, ATA_EH_RESET); + trace_ata_link_softreset_begin(link, classes, deadline); rc = ata_do_reset(link, reset, classes, deadline, true); + trace_ata_link_softreset_end(link, classes, rc); if (rc) { failed_link = link; goto fail; @@ -2698,8 +2710,11 @@ int ata_eh_reset(struct ata_link *link, int classify, */ if (postreset) { postreset(link, classes); - if (slave) + trace_ata_link_postreset(link, classes, rc); + if (slave) { postreset(slave, classes); + trace_ata_slave_postreset(slave, classes, rc); + } } /* diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index a119fabe0919..4cc7c0606e06 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1956,8 +1956,6 @@ static int ata_bus_softreset(struct ata_port *ap, unsigned int devmask, { struct ata_ioports *ioaddr = &ap->ioaddr; - DPRINTK("ata%u: bus reset via SRST\n", ap->print_id); - if (ap->ioaddr.ctl_addr) { /* software reset. causes dev0 to be selected */ iowrite8(ap->ctl, ioaddr->ctl_addr); @@ -1995,8 +1993,6 @@ int ata_sff_softreset(struct ata_link *link, unsigned int *classes, int rc; u8 err; - DPRINTK("ENTER\n"); - /* determine if device 0/1 are present */ if (ata_devchk(ap, 0)) devmask |= (1 << 0); @@ -2007,7 +2003,6 @@ int ata_sff_softreset(struct ata_link *link, unsigned int *classes, ap->ops->sff_dev_select(ap, 0); /* issue bus reset */ - DPRINTK("about to softreset, devmask=%x\n", devmask); rc = ata_bus_softreset(ap, devmask, deadline); /* if link is occupied, -ENODEV too is an error */ if (rc && (rc != -ENODEV || sata_scr_valid(link))) { @@ -2022,7 +2017,6 @@ int ata_sff_softreset(struct ata_link *link, unsigned int *classes, classes[1] = ata_sff_dev_classify(&link->device[1], devmask & (1 << 1), &err); - DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]); return 0; } EXPORT_SYMBOL_GPL(ata_sff_softreset); @@ -2055,7 +2049,6 @@ int sata_sff_hardreset(struct ata_link *link, unsigned int *class, if (online) *class = ata_sff_dev_classify(link->device, 1, NULL); - DPRINTK("EXIT, class=%u\n", *class); return rc; } EXPORT_SYMBOL_GPL(sata_sff_hardreset); @@ -2085,10 +2078,8 @@ void ata_sff_postreset(struct ata_link *link, unsigned int *classes) ap->ops->sff_dev_select(ap, 0); /* bail out if no device is present */ - if (classes[0] == ATA_DEV_NONE && classes[1] == ATA_DEV_NONE) { - DPRINTK("EXIT, no device\n"); + if (classes[0] == ATA_DEV_NONE && classes[1] == ATA_DEV_NONE) return; - } /* set up device control */ if (ap->ops->sff_set_devctl || ap->ioaddr.ctl_addr) { diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index b5a3f710d76d..cdc95eb2b2cb 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -440,7 +440,6 @@ static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes, int rc; u8 err; - DPRINTK("about to softreset\n"); __raw_writew(ap->ctl, base + 0xe); udelay(20); __raw_writew(ap->ctl | ATA_SRST, base + 0xe); @@ -455,7 +454,6 @@ static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes, /* determine by signature whether we have ATA or ATAPI devices */ classes[0] = ata_sff_dev_classify(&link->device[0], 1, &err); - DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]); return 0; } diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 7504d9fbff2a..8aace70e8826 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -827,8 +827,6 @@ static int sata_fsl_hardreset(struct ata_link *link, unsigned int *class, int i = 0; unsigned long start_jiffies; - DPRINTK("in xx_hardreset\n"); - try_offline_again: /* * Force host controller to go off-line, aborting current operations @@ -943,10 +941,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, u8 *cfis; u32 Serror; - DPRINTK("in xx_softreset\n"); - if (ata_link_offline(link)) { - DPRINTK("PHY reports no device\n"); *class = ATA_DEV_NONE; return 0; } @@ -959,8 +954,6 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, * reached here, we can send a command to the target device */ - DPRINTK("Sending SRST/device reset\n"); - ata_tf_init(link->device, &tf); cfis = (u8 *) &pp->cmdentry->cfis; @@ -1032,8 +1025,6 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, */ iowrite32(0x01, CC + hcr_base); /* We know it will be cmd#0 always */ - DPRINTK("SATA FSL : Now checking device signature\n"); - *class = ATA_DEV_NONE; /* Verify if SStatus indicates device presence */ @@ -1047,7 +1038,6 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, *class = sata_fsl_dev_classify(ap); - DPRINTK("class = %d\n", *class); VPRINTK("ccreg = 0x%x\n", ioread32(hcr_base + CC)); VPRINTK("cereg = 0x%x\n", ioread32(hcr_base + CE)); } diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 44b0ed8f6bb8..9005833ab02f 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -323,8 +323,6 @@ static int sata_rcar_bus_softreset(struct ata_port *ap, unsigned long deadline) { struct ata_ioports *ioaddr = &ap->ioaddr; - DPRINTK("ata%u: bus reset via SRST\n", ap->print_id); - /* software reset. causes dev0 to be selected */ iowrite32(ap->ctl, ioaddr->ctl_addr); udelay(20); @@ -350,7 +348,6 @@ static int sata_rcar_softreset(struct ata_link *link, unsigned int *classes, devmask |= 1 << 0; /* issue bus reset */ - DPRINTK("about to softreset, devmask=%x\n", devmask); rc = sata_rcar_bus_softreset(ap, deadline); /* if link is occupied, -ENODEV too is an error */ if (rc && (rc != -ENODEV || sata_scr_valid(link))) { @@ -361,7 +358,6 @@ static int sata_rcar_softreset(struct ata_link *link, unsigned int *classes, /* determine by signature whether we have ATA or ATAPI devices */ classes[0] = ata_sff_dev_classify(&link->device[0], devmask, &err); - DPRINTK("classes[0]=%u\n", classes[0]); return 0; } diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index 7e9c1945dc81..2fef6ce93f07 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -656,8 +656,6 @@ static int sil24_softreset(struct ata_link *link, unsigned int *class, const char *reason; int rc; - DPRINTK("ENTER\n"); - /* put the port into known state */ if (sil24_init_port(ap)) { reason = "port not ready"; @@ -682,7 +680,6 @@ static int sil24_softreset(struct ata_link *link, unsigned int *class, sil24_read_tf(ap, 0, &tf); *class = ata_port_classify(ap, &tf); - DPRINTK("EXIT, class=%u\n", *class); return 0; err: diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index ab69434e2329..ec2a350d1aca 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -132,6 +132,22 @@ ata_protocol_name(ATAPI_PROT_PIO), \ ata_protocol_name(ATAPI_PROT_DMA)) +#define ata_class_name(class) { class, #class } +#define show_class_name(val) \ + __print_symbolic(val, \ + ata_class_name(ATA_DEV_UNKNOWN), \ + ata_class_name(ATA_DEV_ATA), \ + ata_class_name(ATA_DEV_ATA_UNSUP), \ + ata_class_name(ATA_DEV_ATAPI), \ + ata_class_name(ATA_DEV_ATAPI_UNSUP), \ + ata_class_name(ATA_DEV_PMP), \ + ata_class_name(ATA_DEV_PMP_UNSUP), \ + ata_class_name(ATA_DEV_SEMB), \ + ata_class_name(ATA_DEV_SEMB_UNSUP), \ + ata_class_name(ATA_DEV_ZAC), \ + ata_class_name(ATA_DEV_ZAC_UNSUP), \ + ata_class_name(ATA_DEV_NONE)) + const char *libata_trace_parse_status(struct trace_seq*, unsigned char); #define __parse_status(s) libata_trace_parse_status(p, s) @@ -329,6 +345,86 @@ TRACE_EVENT(ata_eh_link_autopsy_qc, __parse_eh_err_mask(__entry->eh_err_mask)) ); +DECLARE_EVENT_CLASS(ata_link_reset_begin_template, + + TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), + + TP_ARGS(link, class, deadline), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __array( unsigned int, class, 2 ) + __field( unsigned long, deadline ) + ), + + TP_fast_assign( + __entry->ata_port = link->ap->print_id; + memcpy(__entry->class, class, 2); + __entry->deadline = deadline; + ), + + TP_printk("ata_port=%u deadline=%lu classes=[%s,%s]", + __entry->ata_port, __entry->deadline, + show_class_name(__entry->class[0]), + show_class_name(__entry->class[1])) +); + +DEFINE_EVENT(ata_link_reset_begin_template, ata_link_hardreset_begin, + TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), + TP_ARGS(link, class, deadline)); + +DEFINE_EVENT(ata_link_reset_begin_template, ata_slave_hardreset_begin, + TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), + TP_ARGS(link, class, deadline)); + +DEFINE_EVENT(ata_link_reset_begin_template, ata_link_softreset_begin, + TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), + TP_ARGS(link, class, deadline)); + +DECLARE_EVENT_CLASS(ata_link_reset_end_template, + + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + + TP_ARGS(link, class, rc), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __array( unsigned int, class, 2 ) + __field( int, rc ) + ), + + TP_fast_assign( + __entry->ata_port = link->ap->print_id; + memcpy(__entry->class, class, 2); + __entry->rc = rc; + ), + + TP_printk("ata_port=%u rc=%d class=[%s,%s]", + __entry->ata_port, __entry->rc, + show_class_name(__entry->class[0]), + show_class_name(__entry->class[1])) +); + +DEFINE_EVENT(ata_link_reset_end_template, ata_link_hardreset_end, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + +DEFINE_EVENT(ata_link_reset_end_template, ata_slave_hardreset_end, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + +DEFINE_EVENT(ata_link_reset_end_template, ata_link_softreset_end, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + +DEFINE_EVENT(ata_link_reset_end_template, ata_link_postreset, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + +DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + #endif /* _TRACE_LIBATA_H */ /* This part must be outside protection */ From fc914faad67f237528739ec717222a560c97e723 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:30 +0100 Subject: [PATCH 190/493] ata: libata: add qc_prep tracepoint Convert the existing ata_qc_issue() tracepoint into a template, and add tracepoints for ata_qc_prep() and ata_qc_issue() based on that template. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 1 + include/trace/events/libata.h | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c1946336f81e..846596fdd831 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4892,6 +4892,7 @@ void ata_qc_issue(struct ata_queued_cmd *qc) return; } + trace_ata_qc_prep(qc); qc->err_mask |= ap->ops->qc_prep(qc); if (unlikely(qc->err_mask)) goto err; diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index ec2a350d1aca..2394fc2b2831 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -164,7 +164,7 @@ const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char, unsigned char, unsigned char); #define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h) -TRACE_EVENT(ata_qc_issue, +DECLARE_EVENT_CLASS(ata_qc_issue_template, TP_PROTO(struct ata_queued_cmd *qc), @@ -223,6 +223,14 @@ TRACE_EVENT(ata_qc_issue, __entry->dev) ); +DEFINE_EVENT(ata_qc_issue_template, ata_qc_prep, + TP_PROTO(struct ata_queued_cmd *qc), + TP_ARGS(qc)); + +DEFINE_EVENT(ata_qc_issue_template, ata_qc_issue, + TP_PROTO(struct ata_queued_cmd *qc), + TP_ARGS(qc)); + DECLARE_EVENT_CLASS(ata_qc_complete_template, TP_PROTO(struct ata_queued_cmd *qc), From 26e9baa849a262f75b781c7292c36f4fdfbbf03b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jan 2022 17:33:30 +0100 Subject: [PATCH 191/493] dmaengine: ioatdma: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the ioatdma sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20220104163330.1338824-1-gregkh@linuxfoundation.org Signed-off-by: Vinod Koul --- drivers/dma/ioat/sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c index aa44bcd6a356..168adf28c5b1 100644 --- a/drivers/dma/ioat/sysfs.c +++ b/drivers/dma/ioat/sysfs.c @@ -158,8 +158,9 @@ static struct attribute *ioat_attrs[] = { &intr_coalesce_attr.attr, NULL, }; +ATTRIBUTE_GROUPS(ioat); struct kobj_type ioat_ktype = { .sysfs_ops = &ioat_sysfs_ops, - .default_attrs = ioat_attrs, + .default_groups = ioat_groups, }; From ec0d64231615e50539d83516b974e7947d45fbce Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Dec 2021 11:51:23 -0700 Subject: [PATCH 192/493] dmaengine: idxd: embed irq_entry in idxd_wq struct With irq_entry already being associated with the wq in a 1:1 relationship, embed the irq_entry in the idxd_wq struct and remove back pointers for idxe_wq and idxd_device. In the process of this work, clean up the interrupt handle assignment so that there's no decision to be made during submit call on where interrupt handle value comes from. Set the interrupt handle during irq request initialization time. irq_entry 0 is designated as special and is tied to the device itself. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163942148362.2412839.12055447853311267866.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 18 +++--- drivers/dma/idxd/idxd.h | 22 +++++-- drivers/dma/idxd/init.c | 119 +++++++++++++++----------------------- drivers/dma/idxd/irq.c | 10 ++-- drivers/dma/idxd/submit.c | 8 +-- drivers/dma/idxd/sysfs.c | 1 - 6 files changed, 79 insertions(+), 99 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5a50ee6f6881..8233a29f859d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -21,8 +21,11 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq); /* Interrupt control bits */ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); + struct idxd_irq_entry *ie; + struct irq_data *data; + ie = idxd_get_ie(idxd, vec_id); + data = irq_get_irq_data(ie->vector); pci_msi_mask_irq(data); } @@ -38,8 +41,11 @@ void idxd_mask_msix_vectors(struct idxd_device *idxd) void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); + struct idxd_irq_entry *ie; + struct irq_data *data; + ie = idxd_get_ie(idxd, vec_id); + data = irq_get_irq_data(ie->vector); pci_msi_unmask_irq(data); } @@ -1216,13 +1222,6 @@ int __drv_enable_wq(struct idxd_wq *wq) goto err; } - /* - * Device has 1 misc interrupt and N interrupts for descriptor completion. To - * assign WQ to interrupt, we will take the N+1 interrupt since vector 0 is - * for the misc interrupt. - */ - wq->ie = &idxd->irq_entries[wq->id + 1]; - rc = idxd_wq_enable(wq); if (rc < 0) { dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); @@ -1273,7 +1272,6 @@ void __drv_disable_wq(struct idxd_wq *wq) idxd_wq_drain(wq); idxd_wq_reset(wq); - wq->ie = NULL; wq->client_count = 0; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 6b9bfdc557fe..d77be03dd8b0 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -70,7 +70,6 @@ extern struct idxd_device_driver idxd_user_drv; #define INVALID_INT_HANDLE -1 struct idxd_irq_entry { - struct idxd_device *idxd; int id; int vector; struct llist_head pending_llist; @@ -81,7 +80,6 @@ struct idxd_irq_entry { */ spinlock_t list_lock; int int_handle; - struct idxd_wq *wq; ioasid_t pasid; }; @@ -185,7 +183,7 @@ struct idxd_wq { struct wait_queue_head err_queue; struct idxd_device *idxd; int id; - struct idxd_irq_entry *ie; + struct idxd_irq_entry ie; enum idxd_wq_type type; struct idxd_group *group; int client_count; @@ -266,6 +264,7 @@ struct idxd_device { int id; int major; u32 cmd_status; + struct idxd_irq_entry ie; /* misc irq, msix 0 */ struct pci_dev *pdev; void __iomem *reg_base; @@ -302,8 +301,6 @@ struct idxd_device { union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; - int num_wq_irqs; - struct idxd_irq_entry *irq_entries; struct idxd_dma_dev *idxd_dma; struct workqueue_struct *wq; @@ -395,6 +392,21 @@ static inline void idxd_dev_set_type(struct idxd_dev *idev, int type) idev->type = type; } +static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx) +{ + return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie; +} + +static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie) +{ + return container_of(ie, struct idxd_wq, ie); +} + +static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie) +{ + return container_of(ie, struct idxd_device, ie); +} + extern struct bus_type dsa_bus_type; extern bool support_enqcmd; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 8b3afce9ea67..29c732a94027 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -72,7 +72,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct device *dev = &pdev->dev; - struct idxd_irq_entry *irq_entry; + struct idxd_irq_entry *ie; int i, msixcnt; int rc = 0; @@ -90,72 +90,54 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); - /* - * We implement 1 completion list per MSI-X entry except for - * entry 0, which is for errors and others. - */ - idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry), - GFP_KERNEL, dev_to_node(dev)); - if (!idxd->irq_entries) { - rc = -ENOMEM; - goto err_irq_entries; - } - - for (i = 0; i < msixcnt; i++) { - idxd->irq_entries[i].id = i; - idxd->irq_entries[i].idxd = idxd; - /* - * Association of WQ should be assigned starting with irq_entry 1. - * irq_entry 0 is for misc interrupts and has no wq association - */ - if (i > 0) - idxd->irq_entries[i].wq = idxd->wqs[i - 1]; - idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); - idxd->irq_entries[i].int_handle = INVALID_INT_HANDLE; - if (device_pasid_enabled(idxd) && i > 0) - idxd->irq_entries[i].pasid = idxd->pasid; - else - idxd->irq_entries[i].pasid = INVALID_IOASID; - spin_lock_init(&idxd->irq_entries[i].list_lock); - } - idxd_msix_perm_setup(idxd); - irq_entry = &idxd->irq_entries[0]; - rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, - 0, "idxd-misc", irq_entry); + ie = idxd_get_ie(idxd, 0); + ie->vector = pci_irq_vector(pdev, 0); + rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie); if (rc < 0) { dev_err(dev, "Failed to allocate misc interrupt.\n"); goto err_misc_irq; } - dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector); + dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", ie->vector); - /* first MSI-X entry is not for wq interrupts */ - idxd->num_wq_irqs = msixcnt - 1; + for (i = 0; i < idxd->max_wqs; i++) { + int msix_idx = i + 1; - for (i = 1; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; + ie = idxd_get_ie(idxd, msix_idx); - init_llist_head(&idxd->irq_entries[i].pending_llist); - INIT_LIST_HEAD(&idxd->irq_entries[i].work_list); - rc = request_threaded_irq(irq_entry->vector, NULL, - idxd_wq_thread, 0, "idxd-portal", irq_entry); + /* MSIX vector 0 special, wq irq entry starts at 1 */ + ie->id = msix_idx; + ie->vector = pci_irq_vector(pdev, msix_idx); + ie->int_handle = INVALID_INT_HANDLE; + if (device_pasid_enabled(idxd) && i > 0) + ie->pasid = idxd->pasid; + else + ie->pasid = INVALID_IOASID; + spin_lock_init(&ie->list_lock); + init_llist_head(&ie->pending_llist); + INIT_LIST_HEAD(&ie->work_list); + + rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); if (rc < 0) { - dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector); + dev_err(dev, "Failed to allocate irq %d.\n", ie->vector); goto err_wq_irqs; } - dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); + dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, ie->vector); if (idxd->request_int_handles) { - rc = idxd_device_request_int_handle(idxd, i, &irq_entry->int_handle, + rc = idxd_device_request_int_handle(idxd, i, &ie->int_handle, IDXD_IRQ_MSIX); if (rc < 0) { - free_irq(irq_entry->vector, irq_entry); + free_irq(ie->vector, ie); goto err_wq_irqs; } - dev_dbg(dev, "int handle requested: %u\n", irq_entry->int_handle); + dev_dbg(dev, "int handle requested: %u\n", ie->int_handle); + } else { + ie->int_handle = msix_idx; } + } idxd_unmask_error_interrupts(idxd); @@ -163,23 +145,19 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) err_wq_irqs: while (--i >= 0) { - irq_entry = &idxd->irq_entries[i]; - free_irq(irq_entry->vector, irq_entry); - if (irq_entry->int_handle != INVALID_INT_HANDLE) { - idxd_device_release_int_handle(idxd, irq_entry->int_handle, - IDXD_IRQ_MSIX); - irq_entry->int_handle = INVALID_INT_HANDLE; - irq_entry->pasid = INVALID_IOASID; + ie = &idxd->wqs[i]->ie; + free_irq(ie->vector, ie); + if (ie->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = INVALID_IOASID; } - irq_entry->vector = -1; - irq_entry->wq = NULL; - irq_entry->idxd = NULL; + ie->vector = -1; } err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); idxd_msix_perm_clear(idxd); - err_irq_entries: pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); return rc; @@ -188,21 +166,18 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; - struct idxd_irq_entry *irq_entry; + struct idxd_irq_entry *ie; int i; for (i = 0; i < idxd->irq_cnt; i++) { - irq_entry = &idxd->irq_entries[i]; - if (irq_entry->int_handle != INVALID_INT_HANDLE) { - idxd_device_release_int_handle(idxd, irq_entry->int_handle, - IDXD_IRQ_MSIX); - irq_entry->int_handle = INVALID_INT_HANDLE; - irq_entry->pasid = INVALID_IOASID; + ie = idxd_get_ie(idxd, i); + if (ie->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = INVALID_IOASID; } - irq_entry->vector = -1; - irq_entry->wq = NULL; - irq_entry->idxd = NULL; - free_irq(irq_entry->vector, irq_entry); + free_irq(ie->vector, ie); + ie->vector = -1; } idxd_mask_error_interrupts(idxd); @@ -755,7 +730,7 @@ static void idxd_release_int_handles(struct idxd_device *idxd) int i, rc; for (i = 1; i < idxd->irq_cnt; i++) { - struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); if (ie->int_handle != INVALID_INT_HANDLE) { rc = idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); @@ -783,7 +758,7 @@ static void idxd_shutdown(struct pci_dev *pdev) idxd_mask_error_interrupts(idxd); for (i = 0; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; + irq_entry = idxd_get_ie(idxd, i); synchronize_irq(irq_entry->vector); if (i == 0) continue; @@ -815,7 +790,7 @@ static void idxd_remove(struct pci_dev *pdev) idxd_disable_system_pasid(idxd); for (i = 0; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; + irq_entry = idxd_get_ie(idxd, i); free_irq(irq_entry->vector, irq_entry); } idxd_msix_perm_clear(idxd); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 925171e9738c..a1316f341dd6 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -73,8 +73,8 @@ static void idxd_device_reinit(struct work_struct *work) */ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) { - struct idxd_wq *wq = ie->wq; - struct idxd_device *idxd = ie->idxd; + struct idxd_wq *wq = ie_to_wq(ie); + struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; struct dsa_hw_desc desc = {}; void __iomem *portal; @@ -155,8 +155,8 @@ static void idxd_int_handle_revoke(struct work_struct *work) * at the end to make sure all invalid int handle descriptors are processed. */ for (i = 1; i < idxd->irq_cnt; i++) { - struct idxd_irq_entry *ie = &idxd->irq_entries[i]; - struct idxd_wq *wq = ie->wq; + struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); + struct idxd_wq *wq = ie_to_wq(ie); rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); if (rc < 0) { @@ -338,7 +338,7 @@ halt: irqreturn_t idxd_misc_thread(int vec, void *data) { struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = irq_entry->idxd; + struct idxd_device *idxd = ie_to_idxd(irq_entry); int rc; u32 cause; diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 11ac06be1f0a..e289fd48711a 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -193,12 +193,8 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * that we designated the descriptor to. */ if (desc_flags & IDXD_OP_FLAG_RCI) { - ie = wq->ie; - if (ie->int_handle == INVALID_INT_HANDLE) - desc->hw->int_handle = ie->id; - else - desc->hw->int_handle = ie->int_handle; - + ie = &wq->ie; + desc->hw->int_handle = ie->int_handle; llist_add(&desc->llnode, &ie->pending_llist); } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index c0fec88ff6c1..13404532131b 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1304,7 +1304,6 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->groups); kfree(idxd->wqs); kfree(idxd->engines); - kfree(idxd->irq_entries); ida_free(&idxd_ida, idxd->id); kfree(idxd); } From 23a50c8035655c5a1d9b52c878b3ebf7b6b83eea Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Dec 2021 11:51:29 -0700 Subject: [PATCH 193/493] dmaengine: idxd: fix descriptor flushing locking The descriptor flushing for shutdown is not holding the irq_entry list lock. If there's ongoing interrupt completion handling, this can corrupt the list. Add locking to protect list walking. Also refactor the code so it's more compact. Fixes: 8f47d1a5e545 ("dmaengine: idxd: connect idxd to dmaengine subsystem") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163942148935.2412839.18282664745572777280.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 29c732a94027..03c735727f68 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -689,26 +689,28 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; } -static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) +static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) { struct idxd_desc *desc, *itr; struct llist_node *head; + LIST_HEAD(flist); + enum idxd_complete_type ctype; + spin_lock(&ie->list_lock); head = llist_del_all(&ie->pending_llist); - if (!head) - return; + if (head) { + llist_for_each_entry_safe(desc, itr, head, llnode) + list_add_tail(&desc->list, &ie->work_list); + } - llist_for_each_entry_safe(desc, itr, head, llnode) - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); -} + list_for_each_entry_safe(desc, itr, &ie->work_list, list) + list_move_tail(&desc->list, &flist); + spin_unlock(&ie->list_lock); -static void idxd_flush_work_list(struct idxd_irq_entry *ie) -{ - struct idxd_desc *desc, *iter; - - list_for_each_entry_safe(desc, iter, &ie->work_list, list) { + list_for_each_entry_safe(desc, itr, &flist, list) { list_del(&desc->list); - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); + ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; + idxd_dma_complete_txd(desc, ctype, true); } } @@ -762,8 +764,7 @@ static void idxd_shutdown(struct pci_dev *pdev) synchronize_irq(irq_entry->vector); if (i == 0) continue; - idxd_flush_pending_llist(irq_entry); - idxd_flush_work_list(irq_entry); + idxd_flush_pending_descs(irq_entry); } flush_workqueue(idxd->wq); } From 403a2e236538c6b479ea5bfc8b75a75540cfba6b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Dec 2021 11:51:34 -0700 Subject: [PATCH 194/493] dmaengine: idxd: change MSIX allocation based on per wq activation Change the driver where WQ interrupt is requested only when wq is being enabled. This new scheme set things up so that request_threaded_irq() is only called when a kernel wq type is being enabled. This also sets up for future interrupt request where different interrupt handler such as wq occupancy interrupt can be setup instead of the wq completion interrupt. Not calling request_irq() until the WQ actually needs an irq also prevents wasting of CPU irq vectors on x86 systems, which is a limited resource. idxd_flush_pending_descs() is moved to device.c since descriptor flushing is now part of wq disable rather than shutdown(). Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163942149487.2412839.6691222855803875848.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 161 +++++++++++++++++++++++--------------- drivers/dma/idxd/dma.c | 12 +++ drivers/dma/idxd/idxd.h | 7 +- drivers/dma/idxd/init.c | 133 ++++--------------------------- drivers/dma/idxd/irq.c | 3 + include/uapi/linux/idxd.h | 1 + 6 files changed, 132 insertions(+), 185 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 8233a29f859d..280b41417f41 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -19,36 +19,6 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd); static void idxd_wq_disable_cleanup(struct idxd_wq *wq); /* Interrupt control bits */ -void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) -{ - struct idxd_irq_entry *ie; - struct irq_data *data; - - ie = idxd_get_ie(idxd, vec_id); - data = irq_get_irq_data(ie->vector); - pci_msi_mask_irq(data); -} - -void idxd_mask_msix_vectors(struct idxd_device *idxd) -{ - struct pci_dev *pdev = idxd->pdev; - int msixcnt = pci_msix_vec_count(pdev); - int i; - - for (i = 0; i < msixcnt; i++) - idxd_mask_msix_vector(idxd, i); -} - -void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) -{ - struct idxd_irq_entry *ie; - struct irq_data *data; - - ie = idxd_get_ie(idxd, vec_id); - data = irq_get_irq_data(ie->vector); - pci_msi_unmask_irq(data); -} - void idxd_unmask_error_interrupts(struct idxd_device *idxd) { union genctrl_reg genctrl; @@ -593,7 +563,6 @@ void idxd_device_reset(struct idxd_device *idxd) idxd_device_clear_state(idxd); idxd->state = IDXD_DEV_DISABLED; idxd_unmask_error_interrupts(idxd); - idxd_msix_perm_setup(idxd); spin_unlock(&idxd->dev_lock); } @@ -732,36 +701,6 @@ void idxd_device_clear_state(struct idxd_device *idxd) idxd_device_wqs_clear_state(idxd); } -void idxd_msix_perm_setup(struct idxd_device *idxd) -{ - union msix_perm mperm; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(idxd->pdev); - if (msixcnt < 0) - return; - - mperm.bits = 0; - mperm.pasid = idxd->pasid; - mperm.pasid_en = device_pasid_enabled(idxd); - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); -} - -void idxd_msix_perm_clear(struct idxd_device *idxd) -{ - union msix_perm mperm; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(idxd->pdev); - if (msixcnt < 0) - return; - - mperm.bits = 0; - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); -} - static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; @@ -1158,6 +1097,106 @@ int idxd_device_load_config(struct idxd_device *idxd) return 0; } +static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) +{ + struct idxd_desc *desc, *itr; + struct llist_node *head; + LIST_HEAD(flist); + enum idxd_complete_type ctype; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(desc, itr, head, llnode) + list_add_tail(&desc->list, &ie->work_list); + } + + list_for_each_entry_safe(desc, itr, &ie->work_list, list) + list_move_tail(&desc->list, &flist); + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(desc, itr, &flist, list) { + list_del(&desc->list); + ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; + idxd_dma_complete_txd(desc, ctype, true); + } +} + +static void idxd_device_set_perm_entry(struct idxd_device *idxd, + struct idxd_irq_entry *ie) +{ + union msix_perm mperm; + + if (ie->pasid == INVALID_IOASID) + return; + + mperm.bits = 0; + mperm.pasid = ie->pasid; + mperm.pasid_en = 1; + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8); +} + +static void idxd_device_clear_perm_entry(struct idxd_device *idxd, + struct idxd_irq_entry *ie) +{ + iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8); +} + +void idxd_wq_free_irq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = &wq->ie; + + synchronize_irq(ie->vector); + free_irq(ie->vector, ie); + idxd_flush_pending_descs(ie); + if (idxd->request_int_handles) + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + idxd_device_clear_perm_entry(idxd, ie); + ie->vector = -1; + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = INVALID_IOASID; +} + +int idxd_wq_request_irq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct idxd_irq_entry *ie; + int rc; + + ie = &wq->ie; + ie->vector = pci_irq_vector(pdev, ie->id); + ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID; + idxd_device_set_perm_entry(idxd, ie); + + rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); + if (rc < 0) { + dev_err(dev, "Failed to request irq %d.\n", ie->vector); + goto err_irq; + } + + if (idxd->request_int_handles) { + rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle, + IDXD_IRQ_MSIX); + if (rc < 0) + goto err_int_handle; + } else { + ie->int_handle = ie->id; + } + + return 0; + +err_int_handle: + ie->int_handle = INVALID_INT_HANDLE; + free_irq(ie->vector, ie); +err_irq: + idxd_device_clear_perm_entry(idxd, ie); + ie->pasid = INVALID_IOASID; + return rc; +} + int __drv_enable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 2ce873994e33..bfff59617d04 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -289,6 +289,14 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); wq->type = IDXD_WQT_KERNEL; + + rc = idxd_wq_request_irq(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR; + dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc); + goto err_irq; + } + rc = __drv_enable_wq(wq); if (rc < 0) { dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc); @@ -329,6 +337,8 @@ err_ref: err_res_alloc: __drv_disable_wq(wq); err: + idxd_wq_free_irq(wq); +err_irq: wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); return rc; @@ -344,6 +354,8 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) idxd_wq_free_resources(wq); __drv_disable_wq(wq); percpu_ref_exit(&wq->wq_active); + idxd_wq_free_irq(wq); + wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index d77be03dd8b0..6353e762286d 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -548,15 +548,10 @@ void idxd_wqs_quiesce(struct idxd_device *idxd); bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); /* device interrupt control */ -void idxd_msix_perm_setup(struct idxd_device *idxd); -void idxd_msix_perm_clear(struct idxd_device *idxd); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); void idxd_mask_error_interrupts(struct idxd_device *idxd); void idxd_unmask_error_interrupts(struct idxd_device *idxd); -void idxd_mask_msix_vectors(struct idxd_device *idxd); -void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); -void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ int idxd_register_idxd_drv(void); @@ -595,6 +590,8 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq); void __idxd_wq_quiesce(struct idxd_wq *wq); void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); +void idxd_wq_free_irq(struct idxd_wq *wq); +int idxd_wq_request_irq(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 03c735727f68..3505efb7ae71 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -90,7 +90,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); - idxd_msix_perm_setup(idxd); ie = idxd_get_ie(idxd, 0); ie->vector = pci_irq_vector(pdev, 0); @@ -99,65 +98,26 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) dev_err(dev, "Failed to allocate misc interrupt.\n"); goto err_misc_irq; } - - dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", ie->vector); + dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector); for (i = 0; i < idxd->max_wqs; i++) { int msix_idx = i + 1; ie = idxd_get_ie(idxd, msix_idx); - - /* MSIX vector 0 special, wq irq entry starts at 1 */ ie->id = msix_idx; - ie->vector = pci_irq_vector(pdev, msix_idx); ie->int_handle = INVALID_INT_HANDLE; - if (device_pasid_enabled(idxd) && i > 0) - ie->pasid = idxd->pasid; - else - ie->pasid = INVALID_IOASID; + ie->pasid = INVALID_IOASID; + spin_lock_init(&ie->list_lock); init_llist_head(&ie->pending_llist); INIT_LIST_HEAD(&ie->work_list); - - rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); - if (rc < 0) { - dev_err(dev, "Failed to allocate irq %d.\n", ie->vector); - goto err_wq_irqs; - } - - dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, ie->vector); - if (idxd->request_int_handles) { - rc = idxd_device_request_int_handle(idxd, i, &ie->int_handle, - IDXD_IRQ_MSIX); - if (rc < 0) { - free_irq(ie->vector, ie); - goto err_wq_irqs; - } - dev_dbg(dev, "int handle requested: %u\n", ie->int_handle); - } else { - ie->int_handle = msix_idx; - } - } idxd_unmask_error_interrupts(idxd); return 0; - err_wq_irqs: - while (--i >= 0) { - ie = &idxd->wqs[i]->ie; - free_irq(ie->vector, ie); - if (ie->int_handle != INVALID_INT_HANDLE) { - idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); - ie->int_handle = INVALID_INT_HANDLE; - ie->pasid = INVALID_IOASID; - } - ie->vector = -1; - } err_misc_irq: - /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); - idxd_msix_perm_clear(idxd); pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); return rc; @@ -167,20 +127,15 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct idxd_irq_entry *ie; - int i; + int msixcnt; - for (i = 0; i < idxd->irq_cnt; i++) { - ie = idxd_get_ie(idxd, i); - if (ie->int_handle != INVALID_INT_HANDLE) { - idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); - ie->int_handle = INVALID_INT_HANDLE; - ie->pasid = INVALID_IOASID; - } - free_irq(ie->vector, ie); - ie->vector = -1; - } + msixcnt = pci_msix_vec_count(pdev); + if (msixcnt <= 0) + return; + ie = idxd_get_ie(idxd, 0); idxd_mask_error_interrupts(idxd); + free_irq(ie->vector, ie); pci_free_irq_vectors(pdev); } @@ -592,8 +547,6 @@ static int idxd_probe(struct idxd_device *idxd) if (rc) goto err_config; - dev_dbg(dev, "IDXD interrupt setup complete.\n"); - idxd->major = idxd_cdev_get_major(idxd); rc = perfmon_pmu_init(idxd); @@ -689,31 +642,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; } -static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) -{ - struct idxd_desc *desc, *itr; - struct llist_node *head; - LIST_HEAD(flist); - enum idxd_complete_type ctype; - - spin_lock(&ie->list_lock); - head = llist_del_all(&ie->pending_llist); - if (head) { - llist_for_each_entry_safe(desc, itr, head, llnode) - list_add_tail(&desc->list, &ie->work_list); - } - - list_for_each_entry_safe(desc, itr, &ie->work_list, list) - list_move_tail(&desc->list, &flist); - spin_unlock(&ie->list_lock); - - list_for_each_entry_safe(desc, itr, &flist, list) { - list_del(&desc->list); - ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; - idxd_dma_complete_txd(desc, ctype, true); - } -} - void idxd_wqs_quiesce(struct idxd_device *idxd) { struct idxd_wq *wq; @@ -726,46 +654,19 @@ void idxd_wqs_quiesce(struct idxd_device *idxd) } } -static void idxd_release_int_handles(struct idxd_device *idxd) -{ - struct device *dev = &idxd->pdev->dev; - int i, rc; - - for (i = 1; i < idxd->irq_cnt; i++) { - struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); - - if (ie->int_handle != INVALID_INT_HANDLE) { - rc = idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); - if (rc < 0) - dev_warn(dev, "irq handle %d release failed\n", ie->int_handle); - else - dev_dbg(dev, "int handle released: %u\n", ie->int_handle); - } - } -} - static void idxd_shutdown(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); - int rc, i; struct idxd_irq_entry *irq_entry; - int msixcnt = pci_msix_vec_count(pdev); + int rc; rc = idxd_device_disable(idxd); if (rc) dev_err(&pdev->dev, "Disabling device failed\n"); - dev_dbg(&pdev->dev, "%s called\n", __func__); - idxd_mask_msix_vectors(idxd); + irq_entry = &idxd->ie; + synchronize_irq(irq_entry->vector); idxd_mask_error_interrupts(idxd); - - for (i = 0; i < msixcnt; i++) { - irq_entry = idxd_get_ie(idxd, i); - synchronize_irq(irq_entry->vector); - if (i == 0) - continue; - idxd_flush_pending_descs(irq_entry); - } flush_workqueue(idxd->wq); } @@ -773,8 +674,6 @@ static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); struct idxd_irq_entry *irq_entry; - int msixcnt = pci_msix_vec_count(pdev); - int i; idxd_unregister_devices(idxd); /* @@ -790,12 +689,8 @@ static void idxd_remove(struct pci_dev *pdev) if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - for (i = 0; i < msixcnt; i++) { - irq_entry = idxd_get_ie(idxd, i); - free_irq(irq_entry->vector, irq_entry); - } - idxd_msix_perm_clear(idxd); - idxd_release_int_handles(idxd); + irq_entry = idxd_get_ie(idxd, 0); + free_irq(irq_entry->vector, irq_entry); pci_free_irq_vectors(pdev); pci_iounmap(pdev, idxd->reg_base); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index a1316f341dd6..743ead5ebc57 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -158,6 +158,9 @@ static void idxd_int_handle_revoke(struct work_struct *work) struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); struct idxd_wq *wq = ie_to_wq(ie); + if (ie->int_handle == INVALID_INT_HANDLE) + continue; + rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); if (rc < 0) { dev_warn(dev, "get int handle %d failed: %d\n", i, rc); diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index c750eac09fc9..a8f0ff75c430 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -28,6 +28,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, + IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 From 0f225705cf6536826318180831e18a74595efc8d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Dec 2021 13:15:17 -0700 Subject: [PATCH 195/493] dmaengine: idxd: fix wq settings post wq disable By the spec, wq size and group association is not changeable unless device is disabled. Exclude clearing the shadow copy on wq disable/reset. This allows wq type to be changed after disable to be re-enabled. Move the size and group association to its own cleanup and only call it during device disable. Fixes: 0dcfe41e9a4c ("dmanegine: idxd: cleanup all device related bits after disabling device") Reported-by: Lucas Van Tested-by: Lucas Van Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163951291732.2987775.13576571320501115257.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 280b41417f41..3fce7629daa7 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -358,8 +358,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) lockdep_assert_held(&wq->wq_lock); memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; - wq->size = 0; - wq->group = NULL; wq->threshold = 0; wq->priority = 0; wq->ats_dis = 0; @@ -371,6 +369,15 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; } +static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) +{ + lockdep_assert_held(&wq->wq_lock); + + idxd_wq_disable_cleanup(wq); + wq->size = 0; + wq->group = NULL; +} + static void idxd_wq_ref_release(struct percpu_ref *ref) { struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active); @@ -689,6 +696,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) if (wq->state == IDXD_WQ_ENABLED) { idxd_wq_disable_cleanup(wq); + idxd_wq_device_reset_cleanup(wq); wq->state = IDXD_WQ_DISABLED; } } From 7ed6f1b85fb613e5e44ef3e14d73f2dc96860935 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Dec 2021 13:23:09 -0700 Subject: [PATCH 196/493] dmaengine: idxd: change bandwidth token to read buffers DSA spec v1.2 has changed the term of "bandwidth tokens" to "read buffers" in order to make the concept clearer. Deprecate bandwidth token naming in the driver and convert to read buffers in order to match with the spec and reduce confusion when reading the spec. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163951338932.2988321.6162640806935567317.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 25 +++++++++++---------- drivers/dma/idxd/idxd.h | 12 +++++------ drivers/dma/idxd/init.c | 6 +++--- drivers/dma/idxd/registers.h | 14 ++++++------ drivers/dma/idxd/sysfs.c | 42 ++++++++++++++++++------------------ 5 files changed, 49 insertions(+), 50 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 3fce7629daa7..573ad8b86804 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -678,9 +678,9 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) memset(&group->grpcfg, 0, sizeof(group->grpcfg)); group->num_engines = 0; group->num_wqs = 0; - group->use_token_limit = false; - group->tokens_allowed = 0; - group->tokens_reserved = 0; + group->use_rdbuf_limit = false; + group->rdbufs_allowed = 0; + group->rdbufs_reserved = 0; group->tc_a = -1; group->tc_b = -1; } @@ -748,10 +748,10 @@ static int idxd_groups_config_write(struct idxd_device *idxd) int i; struct device *dev = &idxd->pdev->dev; - /* Setup bandwidth token limit */ - if (idxd->hw.gen_cap.config_en && idxd->token_limit) { + /* Setup bandwidth rdbuf limit */ + if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) { reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); - reg.token_limit = idxd->token_limit; + reg.rdbuf_limit = idxd->rdbuf_limit; iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); } @@ -889,13 +889,12 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->tc_b = group->grpcfg.flags.tc_b = 1; else group->grpcfg.flags.tc_b = group->tc_b; - group->grpcfg.flags.use_token_limit = group->use_token_limit; - group->grpcfg.flags.tokens_reserved = group->tokens_reserved; - if (group->tokens_allowed) - group->grpcfg.flags.tokens_allowed = - group->tokens_allowed; + group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit; + group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved; + if (group->rdbufs_allowed) + group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed; else - group->grpcfg.flags.tokens_allowed = idxd->max_tokens; + group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; } } @@ -1086,7 +1085,7 @@ int idxd_device_load_config(struct idxd_device *idxd) int i, rc; reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); - idxd->token_limit = reg.token_limit; + idxd->rdbuf_limit = reg.rdbuf_limit; for (i = 0; i < idxd->max_groups; i++) { struct idxd_group *group = idxd->groups[i]; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 6353e762286d..da72eb15f610 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -90,9 +90,9 @@ struct idxd_group { int id; int num_engines; int num_wqs; - bool use_token_limit; - u8 tokens_allowed; - u8 tokens_reserved; + bool use_rdbuf_limit; + u8 rdbufs_allowed; + u8 rdbufs_reserved; int tc_a; int tc_b; }; @@ -292,11 +292,11 @@ struct idxd_device { u32 max_batch_size; int max_groups; int max_engines; - int max_tokens; + int max_rdbufs; int max_wqs; int max_wq_size; - int token_limit; - int nr_tokens; /* non-reserved tokens */ + int rdbuf_limit; + int nr_rdbufs; /* non-reserved read buffers */ unsigned int wqcfg_size; union sw_err_reg sw_err; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 3505efb7ae71..08a5f4310188 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -400,9 +400,9 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits); idxd->max_groups = idxd->hw.group_cap.num_groups; dev_dbg(dev, "max groups: %u\n", idxd->max_groups); - idxd->max_tokens = idxd->hw.group_cap.total_tokens; - dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens); - idxd->nr_tokens = idxd->max_tokens; + idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs; + dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs); + idxd->nr_rdbufs = idxd->max_rdbufs; /* read engine capabilities */ idxd->hw.engine_cap.bits = diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 8e396698c22b..aa642aecdc0b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -64,9 +64,9 @@ union wq_cap_reg { union group_cap_reg { struct { u64 num_groups:8; - u64 total_tokens:8; - u64 token_en:1; - u64 token_limit:1; + u64 total_rdbufs:8; /* formerly total_tokens */ + u64 rdbuf_ctrl:1; /* formerly token_en */ + u64 rdbuf_limit:1; /* formerly token_limit */ u64 rsvd:46; }; u64 bits; @@ -110,7 +110,7 @@ union offsets_reg { #define IDXD_GENCFG_OFFSET 0x80 union gencfg_reg { struct { - u32 token_limit:8; + u32 rdbuf_limit:8; u32 rsvd:4; u32 user_int_en:1; u32 rsvd2:19; @@ -288,10 +288,10 @@ union group_flags { u32 tc_a:3; u32 tc_b:3; u32 rsvd:1; - u32 use_token_limit:1; - u32 tokens_reserved:8; + u32 use_rdbuf_limit:1; + u32 rdbufs_reserved:8; u32 rsvd2:4; - u32 tokens_allowed:8; + u32 rdbufs_allowed:8; u32 rsvd3:4; }; u32 bits; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 13404532131b..6f1ebf08878a 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -99,17 +99,17 @@ struct device_type idxd_engine_device_type = { /* Group attributes */ -static void idxd_set_free_tokens(struct idxd_device *idxd) +static void idxd_set_free_rdbufs(struct idxd_device *idxd) { - int i, tokens; + int i, rdbufs; - for (i = 0, tokens = 0; i < idxd->max_groups; i++) { + for (i = 0, rdbufs = 0; i < idxd->max_groups; i++) { struct idxd_group *g = idxd->groups[i]; - tokens += g->tokens_reserved; + rdbufs += g->rdbufs_reserved; } - idxd->nr_tokens = idxd->max_tokens - tokens; + idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs; } static ssize_t group_tokens_reserved_show(struct device *dev, @@ -118,7 +118,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev, { struct idxd_group *group = confdev_to_group(dev); - return sysfs_emit(buf, "%u\n", group->tokens_reserved); + return sysfs_emit(buf, "%u\n", group->rdbufs_reserved); } static ssize_t group_tokens_reserved_store(struct device *dev, @@ -143,14 +143,14 @@ static ssize_t group_tokens_reserved_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; - if (val > idxd->max_tokens) + if (val > idxd->max_rdbufs) return -EINVAL; - if (val > idxd->nr_tokens + group->tokens_reserved) + if (val > idxd->nr_rdbufs + group->rdbufs_reserved) return -EINVAL; - group->tokens_reserved = val; - idxd_set_free_tokens(idxd); + group->rdbufs_reserved = val; + idxd_set_free_rdbufs(idxd); return count; } @@ -164,7 +164,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev, { struct idxd_group *group = confdev_to_group(dev); - return sysfs_emit(buf, "%u\n", group->tokens_allowed); + return sysfs_emit(buf, "%u\n", group->rdbufs_allowed); } static ssize_t group_tokens_allowed_store(struct device *dev, @@ -190,10 +190,10 @@ static ssize_t group_tokens_allowed_store(struct device *dev, return -EPERM; if (val < 4 * group->num_engines || - val > group->tokens_reserved + idxd->nr_tokens) + val > group->rdbufs_reserved + idxd->nr_rdbufs) return -EINVAL; - group->tokens_allowed = val; + group->rdbufs_allowed = val; return count; } @@ -207,7 +207,7 @@ static ssize_t group_use_token_limit_show(struct device *dev, { struct idxd_group *group = confdev_to_group(dev); - return sysfs_emit(buf, "%u\n", group->use_token_limit); + return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit); } static ssize_t group_use_token_limit_store(struct device *dev, @@ -232,10 +232,10 @@ static ssize_t group_use_token_limit_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; - if (idxd->token_limit == 0) + if (idxd->rdbuf_limit == 0) return -EPERM; - group->use_token_limit = !!val; + group->use_rdbuf_limit = !!val; return count; } @@ -1197,7 +1197,7 @@ static ssize_t max_tokens_show(struct device *dev, { struct idxd_device *idxd = confdev_to_idxd(dev); - return sysfs_emit(buf, "%u\n", idxd->max_tokens); + return sysfs_emit(buf, "%u\n", idxd->max_rdbufs); } static DEVICE_ATTR_RO(max_tokens); @@ -1206,7 +1206,7 @@ static ssize_t token_limit_show(struct device *dev, { struct idxd_device *idxd = confdev_to_idxd(dev); - return sysfs_emit(buf, "%u\n", idxd->token_limit); + return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit); } static ssize_t token_limit_store(struct device *dev, @@ -1227,13 +1227,13 @@ static ssize_t token_limit_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (!idxd->hw.group_cap.token_limit) + if (!idxd->hw.group_cap.rdbuf_limit) return -EPERM; - if (val > idxd->hw.group_cap.total_tokens) + if (val > idxd->hw.group_cap.total_rdbufs) return -EINVAL; - idxd->token_limit = val; + idxd->rdbuf_limit = val; return count; } static DEVICE_ATTR_RW(token_limit); From fde212e44f45e491f8e3875084b587c0c2189078 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Dec 2021 13:23:14 -0700 Subject: [PATCH 197/493] dmaengine: idxd: deprecate token sysfs attributes for read buffers The following sysfs attributes will be obsolete due to the name change of tokens to read buffers: max_tokens token_limit group/tokens_allowed group/tokens_reserved group/use_token_limit Create new entries and have old entry print warning of deprecation. New attributes to replace the token ones: max_read_buffers read_buffer_limit group/read_buffers_allowed group/read_buffers_reserved group/use_read_buffer_limit Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163951339488.2988321.2424012059911316373.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 47 ++++-- drivers/dma/idxd/sysfs.c | 145 ++++++++++++++---- 2 files changed, 153 insertions(+), 39 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 4d3a23eb05b9..0c2b613f2373 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -41,14 +41,14 @@ KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: The maximum number of groups can be created under this device. -What: /sys/bus/dsa/devices/dsa/max_tokens -Date: Oct 25, 2019 -KernelVersion: 5.6.0 +What: /sys/bus/dsa/devices/dsa/max_read_buffers +Date: Dec 10, 2021 +KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org -Description: The total number of bandwidth tokens supported by this device. - The bandwidth tokens represent resources within the DSA +Description: The total number of read buffers supported by this device. + The read buffers represent resources within the DSA implementation, and these resources are allocated by engines to - support operations. + support operations. See DSA spec v1.2 9.2.4 Total Read Buffers. What: /sys/bus/dsa/devices/dsa/max_transfer_size Date: Oct 25, 2019 @@ -115,13 +115,13 @@ KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: To indicate if this device is configurable or not. -What: /sys/bus/dsa/devices/dsa/token_limit -Date: Oct 25, 2019 -KernelVersion: 5.6.0 +What: /sys/bus/dsa/devices/dsa/read_buffer_limit +Date: Dec 10, 2021 +KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org -Description: The maximum number of bandwidth tokens that may be in use at +Description: The maximum number of read buffers that may be in use at one time by operations that access low bandwidth memory in the - device. + device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit. What: /sys/bus/dsa/devices/dsa/cmd_status Date: Aug 28, 2020 @@ -224,7 +224,7 @@ What: /sys/bus/dsa/devices/wq./enqcmds_retries Date Oct 29, 2021 KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org -Description: Indicate the number of retires for an enqcmds submission on a shared wq. +Description: Indicate the number of retires for an enqcmds submission on a sharedwq. A max value to set attribute is capped at 64. What: /sys/bus/dsa/devices/engine./group_id @@ -232,3 +232,26 @@ Date: Oct 25, 2019 KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: The group that this engine belongs to. + +What: /sys/bus/dsa/devices/group./use_read_buffer_limit +Date: Dec 10, 2021 +KernelVersion: 5.17.0 +Contact: dmaengine@vger.kernel.org +Description: Enable the use of global read buffer limit for the group. See DSA + spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit. + +What: /sys/bus/dsa/devices/group./read_buffers_allowed +Date: Dec 10, 2021 +KernelVersion: 5.17.0 +Contact: dmaengine@vger.kernel.org +Description: Indicates max number of read buffers that may be in use at one time + by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read + Buffers Allowed. + +What: /sys/bus/dsa/devices/group./read_buffers_reserved +Date: Dec 10, 2021 +KernelVersion: 5.17.0 +Contact: dmaengine@vger.kernel.org +Description: Indicates the number of Read Buffers reserved for the use of + engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers + Reserved. diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6f1ebf08878a..7e19ab92b61a 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -112,18 +112,26 @@ static void idxd_set_free_rdbufs(struct idxd_device *idxd) idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs; } -static ssize_t group_tokens_reserved_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t group_read_buffers_reserved_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->rdbufs_reserved); } -static ssize_t group_tokens_reserved_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_tokens_reserved_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n"); + return group_read_buffers_reserved_show(dev, attr, buf); +} + +static ssize_t group_read_buffers_reserved_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -154,22 +162,42 @@ static ssize_t group_tokens_reserved_store(struct device *dev, return count; } +static ssize_t group_tokens_reserved_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n"); + return group_read_buffers_reserved_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_tokens_reserved = __ATTR(tokens_reserved, 0644, group_tokens_reserved_show, group_tokens_reserved_store); -static ssize_t group_tokens_allowed_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static struct device_attribute dev_attr_group_read_buffers_reserved = + __ATTR(read_buffers_reserved, 0644, group_read_buffers_reserved_show, + group_read_buffers_reserved_store); + +static ssize_t group_read_buffers_allowed_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->rdbufs_allowed); } -static ssize_t group_tokens_allowed_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_tokens_allowed_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n"); + return group_read_buffers_allowed_show(dev, attr, buf); +} + +static ssize_t group_read_buffers_allowed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -197,22 +225,42 @@ static ssize_t group_tokens_allowed_store(struct device *dev, return count; } +static ssize_t group_tokens_allowed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n"); + return group_read_buffers_allowed_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_tokens_allowed = __ATTR(tokens_allowed, 0644, group_tokens_allowed_show, group_tokens_allowed_store); -static ssize_t group_use_token_limit_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static struct device_attribute dev_attr_group_read_buffers_allowed = + __ATTR(read_buffers_allowed, 0644, group_read_buffers_allowed_show, + group_read_buffers_allowed_store); + +static ssize_t group_use_read_buffer_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit); } -static ssize_t group_use_token_limit_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_use_token_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n"); + return group_use_read_buffer_limit_show(dev, attr, buf); +} + +static ssize_t group_use_read_buffer_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -239,10 +287,22 @@ static ssize_t group_use_token_limit_store(struct device *dev, return count; } +static ssize_t group_use_token_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n"); + return group_use_read_buffer_limit_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_use_token_limit = __ATTR(use_token_limit, 0644, group_use_token_limit_show, group_use_token_limit_store); +static struct device_attribute dev_attr_group_use_read_buffer_limit = + __ATTR(use_read_buffer_limit, 0644, group_use_read_buffer_limit_show, + group_use_read_buffer_limit_store); + static ssize_t group_engines_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -387,8 +447,11 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, &dev_attr_group_use_token_limit.attr, + &dev_attr_group_use_read_buffer_limit.attr, &dev_attr_group_tokens_allowed.attr, + &dev_attr_group_read_buffers_allowed.attr, &dev_attr_group_tokens_reserved.attr, + &dev_attr_group_read_buffers_reserved.attr, &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, NULL, @@ -1192,26 +1255,42 @@ static ssize_t errors_show(struct device *dev, } static DEVICE_ATTR_RO(errors); -static ssize_t max_tokens_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t max_read_buffers_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->max_rdbufs); } -static DEVICE_ATTR_RO(max_tokens); -static ssize_t token_limit_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t max_tokens_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see max_read_buffers.\n"); + return max_read_buffers_show(dev, attr, buf); +} + +static DEVICE_ATTR_RO(max_tokens); /* deprecated */ +static DEVICE_ATTR_RO(max_read_buffers); + +static ssize_t read_buffer_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit); } -static ssize_t token_limit_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t token_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffer_limit.\n"); + return read_buffer_limit_show(dev, attr, buf); +} + +static ssize_t read_buffer_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_device *idxd = confdev_to_idxd(dev); unsigned long val; @@ -1236,7 +1315,17 @@ static ssize_t token_limit_store(struct device *dev, idxd->rdbuf_limit = val; return count; } -static DEVICE_ATTR_RW(token_limit); + +static ssize_t token_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffer_limit\n"); + return read_buffer_limit_store(dev, attr, buf, count); +} + +static DEVICE_ATTR_RW(token_limit); /* deprecated */ +static DEVICE_ATTR_RW(read_buffer_limit); static ssize_t cdev_major_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1282,7 +1371,9 @@ static struct attribute *idxd_device_attributes[] = { &dev_attr_state.attr, &dev_attr_errors.attr, &dev_attr_max_tokens.attr, + &dev_attr_max_read_buffers.attr, &dev_attr_token_limit.attr, + &dev_attr_read_buffer_limit.attr, &dev_attr_cdev_major.attr, &dev_attr_cmd_status.attr, NULL, From 57d8d3fc060c7337bc78376ccc699ab80162b7d5 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 27 Dec 2021 15:45:22 +0800 Subject: [PATCH 198/493] i3c: master: svc: move module reset behind clk enable Reset I3C module will R/W its regs, so enable its clocks first. Signed-off-by: Clark Wang Reviewed-by: Miquel Raynal Reviewed-by: Jun Li Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211227074529.1660398-2-xiaoning.wang@nxp.com --- drivers/i3c/master/svc-i3c-master.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 879e5a64acaf..c25a372f6820 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -1381,8 +1381,6 @@ static int svc_i3c_master_probe(struct platform_device *pdev) master->dev = dev; - svc_i3c_master_reset(master); - ret = clk_prepare_enable(master->pclk); if (ret) return ret; @@ -1419,6 +1417,8 @@ static int svc_i3c_master_probe(struct platform_device *pdev) platform_set_drvdata(pdev, master); + svc_i3c_master_reset(master); + /* Register the master */ ret = i3c_master_register(&master->base, &pdev->dev, &svc_i3c_master_ops, false); From a84a9222b2be2949f11f2d7c487052ac2afed4d4 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 27 Dec 2021 15:45:23 +0800 Subject: [PATCH 199/493] i3c: master: svc: fix atomic issue do_daa_locked() function is in a spin lock environment, use readl_poll_timeout_atomic() to replace the origin readl_poll_timeout(). Signed-off-by: Clark Wang Reviewed-by: Miquel Raynal Reviewed-by: Jun Li Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211227074529.1660398-3-xiaoning.wang@nxp.com --- drivers/i3c/master/svc-i3c-master.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index c25a372f6820..47c02a60cf62 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -656,8 +656,10 @@ static int svc_i3c_master_readb(struct svc_i3c_master *master, u8 *dst, u32 reg; for (i = 0; i < len; i++) { - ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, - SVC_I3C_MSTATUS_RXPEND(reg), 0, 1000); + ret = readl_poll_timeout_atomic(master->regs + SVC_I3C_MSTATUS, + reg, + SVC_I3C_MSTATUS_RXPEND(reg), + 0, 1000); if (ret) return ret; @@ -687,10 +689,11 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, * Either one slave will send its ID, or the assignment process * is done. */ - ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, - SVC_I3C_MSTATUS_RXPEND(reg) | - SVC_I3C_MSTATUS_MCTRLDONE(reg), - 1, 1000); + ret = readl_poll_timeout_atomic(master->regs + SVC_I3C_MSTATUS, + reg, + SVC_I3C_MSTATUS_RXPEND(reg) | + SVC_I3C_MSTATUS_MCTRLDONE(reg), + 1, 1000); if (ret) return ret; @@ -744,11 +747,12 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, } /* Wait for the slave to be ready to receive its address */ - ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, - SVC_I3C_MSTATUS_MCTRLDONE(reg) && - SVC_I3C_MSTATUS_STATE_DAA(reg) && - SVC_I3C_MSTATUS_BETWEEN(reg), - 0, 1000); + ret = readl_poll_timeout_atomic(master->regs + SVC_I3C_MSTATUS, + reg, + SVC_I3C_MSTATUS_MCTRLDONE(reg) && + SVC_I3C_MSTATUS_STATE_DAA(reg) && + SVC_I3C_MSTATUS_BETWEEN(reg), + 0, 1000); if (ret) return ret; From 9fd6b5ce8523460b024361a802f5e5738d2da543 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 27 Dec 2021 15:45:24 +0800 Subject: [PATCH 200/493] i3c: master: svc: separate err, fifo and disable interrupt of reset function Sometimes only need to reset err and fifo regs, so split the origin reset function to three functions. Put them at the top of the file, to let more functions can call them. Signed-off-by: Clark Wang Reviewed-by: Miquel Raynal Reviewed-by: Jun Li Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211227074529.1660398-4-xiaoning.wang@nxp.com --- drivers/i3c/master/svc-i3c-master.c | 59 +++++++++++++++++------------ 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 47c02a60cf62..4e69c691253d 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -236,6 +236,40 @@ static void svc_i3c_master_disable_interrupts(struct svc_i3c_master *master) writel(mask, master->regs + SVC_I3C_MINTCLR); } +static void svc_i3c_master_clear_merrwarn(struct svc_i3c_master *master) +{ + /* Clear pending warnings */ + writel(readl(master->regs + SVC_I3C_MERRWARN), + master->regs + SVC_I3C_MERRWARN); +} + +static void svc_i3c_master_flush_fifo(struct svc_i3c_master *master) +{ + /* Flush FIFOs */ + writel(SVC_I3C_MDATACTRL_FLUSHTB | SVC_I3C_MDATACTRL_FLUSHRB, + master->regs + SVC_I3C_MDATACTRL); +} + +static void svc_i3c_master_reset_fifo_trigger(struct svc_i3c_master *master) +{ + u32 reg; + + /* Set RX and TX tigger levels, flush FIFOs */ + reg = SVC_I3C_MDATACTRL_FLUSHTB | + SVC_I3C_MDATACTRL_FLUSHRB | + SVC_I3C_MDATACTRL_UNLOCK_TRIG | + SVC_I3C_MDATACTRL_TXTRIG_FIFO_NOT_FULL | + SVC_I3C_MDATACTRL_RXTRIG_FIFO_NOT_EMPTY; + writel(reg, master->regs + SVC_I3C_MDATACTRL); +} + +static void svc_i3c_master_reset(struct svc_i3c_master *master) +{ + svc_i3c_master_clear_merrwarn(master); + svc_i3c_master_reset_fifo_trigger(master); + svc_i3c_master_disable_interrupts(master); +} + static inline struct svc_i3c_master * to_svc_i3c_master(struct i3c_master_controller *master) { @@ -279,12 +313,6 @@ static void svc_i3c_master_emit_stop(struct svc_i3c_master *master) udelay(1); } -static void svc_i3c_master_clear_merrwarn(struct svc_i3c_master *master) -{ - writel(readl(master->regs + SVC_I3C_MERRWARN), - master->regs + SVC_I3C_MERRWARN); -} - static int svc_i3c_master_handle_ibi(struct svc_i3c_master *master, struct i3c_dev_desc *dev) { @@ -1334,25 +1362,6 @@ static const struct i3c_master_controller_ops svc_i3c_master_ops = { .disable_ibi = svc_i3c_master_disable_ibi, }; -static void svc_i3c_master_reset(struct svc_i3c_master *master) -{ - u32 reg; - - /* Clear pending warnings */ - writel(readl(master->regs + SVC_I3C_MERRWARN), - master->regs + SVC_I3C_MERRWARN); - - /* Set RX and TX tigger levels, flush FIFOs */ - reg = SVC_I3C_MDATACTRL_FLUSHTB | - SVC_I3C_MDATACTRL_FLUSHRB | - SVC_I3C_MDATACTRL_UNLOCK_TRIG | - SVC_I3C_MDATACTRL_TXTRIG_FIFO_NOT_FULL | - SVC_I3C_MDATACTRL_RXTRIG_FIFO_NOT_EMPTY; - writel(reg, master->regs + SVC_I3C_MDATACTRL); - - svc_i3c_master_disable_interrupts(master); -} - static int svc_i3c_master_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; From d5e512574dd2eb06ace859b27cafb0de41743bb5 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 27 Dec 2021 15:45:25 +0800 Subject: [PATCH 201/493] i3c: master: svc: add support for slave to stop returning data When i3c controller reads data from slave device, slave device can stop returning data with an ACK after any byte. Add this support for svc i3c controller. Otherwise, it will timeout when the slave device ends the read operation early. Signed-off-by: Clark Wang Reviewed-by: Jun Li Reviewed-by: Miquel Raynal Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211227074529.1660398-5-xiaoning.wang@nxp.com --- drivers/i3c/master/svc-i3c-master.c | 56 ++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 4e69c691253d..74b38772d692 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -896,27 +896,35 @@ emit_stop: static int svc_i3c_master_read(struct svc_i3c_master *master, u8 *in, unsigned int len) { - int offset = 0, i, ret; - u32 mdctrl; + int offset = 0, i; + u32 mdctrl, mstatus; + bool completed = false; + unsigned int count; + unsigned long start = jiffies; - while (offset < len) { - unsigned int count; + while (!completed) { + mstatus = readl(master->regs + SVC_I3C_MSTATUS); + if (SVC_I3C_MSTATUS_COMPLETE(mstatus) != 0) + completed = true; - ret = readl_poll_timeout(master->regs + SVC_I3C_MDATACTRL, - mdctrl, - !(mdctrl & SVC_I3C_MDATACTRL_RXEMPTY), - 0, 1000); - if (ret) - return ret; + if (time_after(jiffies, start + msecs_to_jiffies(1000))) { + dev_dbg(master->dev, "I3C read timeout\n"); + return -ETIMEDOUT; + } + mdctrl = readl(master->regs + SVC_I3C_MDATACTRL); count = SVC_I3C_MDATACTRL_RXCOUNT(mdctrl); + if (offset + count > len) { + dev_err(master->dev, "I3C receive length too long!\n"); + return -EINVAL; + } for (i = 0; i < count; i++) in[offset + i] = readl(master->regs + SVC_I3C_MRDATAB); offset += count; } - return 0; + return offset; } static int svc_i3c_master_write(struct svc_i3c_master *master, @@ -949,7 +957,7 @@ static int svc_i3c_master_write(struct svc_i3c_master *master, static int svc_i3c_master_xfer(struct svc_i3c_master *master, bool rnw, unsigned int xfer_type, u8 addr, u8 *in, const u8 *out, unsigned int xfer_len, - unsigned int read_len, bool continued) + unsigned int *read_len, bool continued) { u32 reg; int ret; @@ -959,7 +967,7 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, SVC_I3C_MCTRL_IBIRESP_NACK | SVC_I3C_MCTRL_DIR(rnw) | SVC_I3C_MCTRL_ADDR(addr) | - SVC_I3C_MCTRL_RDTERM(read_len), + SVC_I3C_MCTRL_RDTERM(*read_len), master->regs + SVC_I3C_MCTRL); ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, @@ -971,17 +979,27 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, ret = svc_i3c_master_read(master, in, xfer_len); else ret = svc_i3c_master_write(master, out, xfer_len); - if (ret) + if (ret < 0) goto emit_stop; + if (rnw) + *read_len = ret; + ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, SVC_I3C_MSTATUS_COMPLETE(reg), 0, 1000); if (ret) goto emit_stop; - if (!continued) + writel(SVC_I3C_MINT_COMPLETE, master->regs + SVC_I3C_MSTATUS); + + if (!continued) { svc_i3c_master_emit_stop(master); + /* Wait idle if stop is sent. */ + readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, + SVC_I3C_MSTATUS_STATE_IDLE(reg), 0, 1000); + } + return 0; emit_stop: @@ -1039,12 +1057,15 @@ static void svc_i3c_master_start_xfer_locked(struct svc_i3c_master *master) if (!xfer) return; + svc_i3c_master_clear_merrwarn(master); + svc_i3c_master_flush_fifo(master); + for (i = 0; i < xfer->ncmds; i++) { struct svc_i3c_cmd *cmd = &xfer->cmds[i]; ret = svc_i3c_master_xfer(master, cmd->rnw, xfer->type, cmd->addr, cmd->in, cmd->out, - cmd->len, cmd->read_len, + cmd->len, &cmd->read_len, cmd->continued); if (ret) break; @@ -1173,6 +1194,9 @@ static int svc_i3c_master_send_direct_ccc_cmd(struct svc_i3c_master *master, if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) svc_i3c_master_dequeue_xfer(master, xfer); + if (cmd->read_len != xfer_len) + ccc->dests[0].payload.len = cmd->read_len; + ret = xfer->ret; svc_i3c_master_free_xfer(xfer); From 173fcb27210b18b38f1080f1c8f806e02cf8a53b Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 27 Dec 2021 15:45:26 +0800 Subject: [PATCH 202/493] i3c: master: svc: set ODSTOP to let I2C device see the STOP signal If using I2C/I3C mixed mode, need to set ODSTOP. Otherwise, the I2C devices cannot see the stop signal. It may cause message sending errors. Signed-off-by: Clark Wang Reviewed-by: Miquel Raynal Reviewed-by: Jun Li Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211227074529.1660398-6-xiaoning.wang@nxp.com --- drivers/i3c/master/svc-i3c-master.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 74b38772d692..bc9c7fd69cbe 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -477,7 +477,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) struct i3c_device_info info = {}; unsigned long fclk_rate, fclk_period_ns; unsigned int high_period_ns, od_low_period_ns; - u32 ppbaud, pplow, odhpp, odbaud, i2cbaud, reg; + u32 ppbaud, pplow, odhpp, odbaud, odstop, i2cbaud, reg; int ret; /* Timings derivation */ @@ -507,6 +507,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) switch (bus->mode) { case I3C_BUS_MODE_PURE: i2cbaud = 0; + odstop = 0; break; case I3C_BUS_MODE_MIXED_FAST: case I3C_BUS_MODE_MIXED_LIMITED: @@ -515,6 +516,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) * between the high and low period does not really matter. */ i2cbaud = DIV_ROUND_UP(1000, od_low_period_ns) - 2; + odstop = 1; break; case I3C_BUS_MODE_MIXED_SLOW: /* @@ -522,6 +524,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) * constraints as the FM+ mode. */ i2cbaud = DIV_ROUND_UP(2500, od_low_period_ns) - 2; + odstop = 1; break; default: return -EINVAL; @@ -530,7 +533,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) reg = SVC_I3C_MCONFIG_MASTER_EN | SVC_I3C_MCONFIG_DISTO(0) | SVC_I3C_MCONFIG_HKEEP(0) | - SVC_I3C_MCONFIG_ODSTOP(0) | + SVC_I3C_MCONFIG_ODSTOP(odstop) | SVC_I3C_MCONFIG_PPBAUD(ppbaud) | SVC_I3C_MCONFIG_PPLOW(pplow) | SVC_I3C_MCONFIG_ODBAUD(odbaud) | From 05be23ef78f76a741d529226a8764d81c719a1e3 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 27 Dec 2021 15:45:27 +0800 Subject: [PATCH 203/493] i3c: master: svc: add runtime pm support Add runtime pm support to dynamically manage the clock. Signed-off-by: Clark Wang Reviewed-by: Miquel Raynal Reviewed-by: Jun Li Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211227074529.1660398-7-xiaoning.wang@nxp.com --- drivers/i3c/master/svc-i3c-master.c | 196 ++++++++++++++++++++++------ 1 file changed, 156 insertions(+), 40 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index bc9c7fd69cbe..884f5349fb76 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -17,7 +17,9 @@ #include #include #include +#include #include +#include /* Master Mode Registers */ #define SVC_I3C_MCONFIG 0x000 @@ -119,6 +121,7 @@ #define SVC_MDYNADDR_ADDR(x) FIELD_PREP(GENMASK(7, 1), (x)) #define SVC_I3C_MAX_DEVS 32 +#define SVC_I3C_PM_TIMEOUT_MS 1000 /* This parameter depends on the implementation and may be tuned */ #define SVC_I3C_FIFO_SIZE 16 @@ -480,10 +483,20 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) u32 ppbaud, pplow, odhpp, odbaud, odstop, i2cbaud, reg; int ret; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + /* Timings derivation */ fclk_rate = clk_get_rate(master->fclk); - if (!fclk_rate) - return -EINVAL; + if (!fclk_rate) { + ret = -EINVAL; + goto rpm_out; + } fclk_period_ns = DIV_ROUND_UP(1000000000, fclk_rate); @@ -527,7 +540,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) odstop = 1; break; default: - return -EINVAL; + goto rpm_out; } reg = SVC_I3C_MCONFIG_MASTER_EN | @@ -545,7 +558,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) /* Master core's registration */ ret = i3c_master_get_free_addr(m, 0); if (ret < 0) - return ret; + goto rpm_out; info.dyn_addr = ret; @@ -554,21 +567,35 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) ret = i3c_master_set_info(&master->base, &info); if (ret) - return ret; + goto rpm_out; svc_i3c_master_enable_interrupts(master, SVC_I3C_MINT_SLVSTART); - return 0; +rpm_out: + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + + return ret; } static void svc_i3c_master_bus_cleanup(struct i3c_master_controller *m) { struct svc_i3c_master *master = to_svc_i3c_master(m); + int ret; + + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, "<%s> Cannot get runtime PM.\n", __func__); + return; + } svc_i3c_master_disable_interrupts(master); /* Disable master */ writel(0, master->regs + SVC_I3C_MCONFIG); + + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); } static int svc_i3c_master_reserve_slot(struct svc_i3c_master *master) @@ -867,31 +894,36 @@ static int svc_i3c_master_do_daa(struct i3c_master_controller *m) unsigned int dev_nb; int ret, i; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, "<%s> Cannot get runtime PM.\n", __func__); + return ret; + } + spin_lock_irqsave(&master->xferqueue.lock, flags); ret = svc_i3c_master_do_daa_locked(master, addrs, &dev_nb); spin_unlock_irqrestore(&master->xferqueue.lock, flags); - if (ret) - goto emit_stop; + if (ret) { + svc_i3c_master_emit_stop(master); + svc_i3c_master_clear_merrwarn(master); + goto rpm_out; + } /* Register all devices who participated to the core */ for (i = 0; i < dev_nb; i++) { ret = i3c_master_add_i3c_dev_locked(m, addrs[i]); if (ret) - return ret; + goto rpm_out; } /* Configure IBI auto-rules */ ret = svc_i3c_update_ibirules(master); - if (ret) { + if (ret) dev_err(master->dev, "Cannot handle such a list of devices"); - return ret; - } - return 0; - -emit_stop: - svc_i3c_master_emit_stop(master); - svc_i3c_master_clear_merrwarn(master); +rpm_out: + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1060,6 +1092,12 @@ static void svc_i3c_master_start_xfer_locked(struct svc_i3c_master *master) if (!xfer) return; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, "<%s> Cannot get runtime PM.\n", __func__); + return; + } + svc_i3c_master_clear_merrwarn(master); svc_i3c_master_flush_fifo(master); @@ -1074,6 +1112,9 @@ static void svc_i3c_master_start_xfer_locked(struct svc_i3c_master *master) break; } + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + xfer->ret = ret; complete(&xfer->comp); @@ -1350,6 +1391,14 @@ static void svc_i3c_master_free_ibi(struct i3c_dev_desc *dev) static int svc_i3c_master_enable_ibi(struct i3c_dev_desc *dev) { struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct svc_i3c_master *master = to_svc_i3c_master(m); + int ret; + + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, "<%s> Cannot get runtime PM.\n", __func__); + return ret; + } return i3c_master_enec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); } @@ -1357,8 +1406,15 @@ static int svc_i3c_master_enable_ibi(struct i3c_dev_desc *dev) static int svc_i3c_master_disable_ibi(struct i3c_dev_desc *dev) { struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct svc_i3c_master *master = to_svc_i3c_master(m); + int ret; - return i3c_master_disec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); + ret = i3c_master_disec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); + + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + + return ret; } static void svc_i3c_master_recycle_ibi_slot(struct i3c_dev_desc *dev, @@ -1389,6 +1445,37 @@ static const struct i3c_master_controller_ops svc_i3c_master_ops = { .disable_ibi = svc_i3c_master_disable_ibi, }; +static int svc_i3c_master_prepare_clks(struct svc_i3c_master *master) +{ + int ret = 0; + + ret = clk_prepare_enable(master->pclk); + if (ret) + return ret; + + ret = clk_prepare_enable(master->fclk); + if (ret) { + clk_disable_unprepare(master->pclk); + return ret; + } + + ret = clk_prepare_enable(master->sclk); + if (ret) { + clk_disable_unprepare(master->pclk); + clk_disable_unprepare(master->fclk); + return ret; + } + + return 0; +} + +static void svc_i3c_master_unprepare_clks(struct svc_i3c_master *master) +{ + clk_disable_unprepare(master->pclk); + clk_disable_unprepare(master->fclk); + clk_disable_unprepare(master->sclk); +} + static int svc_i3c_master_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1421,24 +1508,16 @@ static int svc_i3c_master_probe(struct platform_device *pdev) master->dev = dev; - ret = clk_prepare_enable(master->pclk); + ret = svc_i3c_master_prepare_clks(master); if (ret) return ret; - ret = clk_prepare_enable(master->fclk); - if (ret) - goto err_disable_pclk; - - ret = clk_prepare_enable(master->sclk); - if (ret) - goto err_disable_fclk; - INIT_WORK(&master->hj_work, svc_i3c_master_hj_work); INIT_WORK(&master->ibi_work, svc_i3c_master_ibi_work); ret = devm_request_irq(dev, master->irq, svc_i3c_master_irq_handler, IRQF_NO_SUSPEND, "svc-i3c-irq", master); if (ret) - goto err_disable_sclk; + goto err_disable_clks; master->free_slots = GENMASK(SVC_I3C_MAX_DEVS - 1, 0); @@ -1452,29 +1531,38 @@ static int svc_i3c_master_probe(struct platform_device *pdev) GFP_KERNEL); if (!master->ibi.slots) { ret = -ENOMEM; - goto err_disable_sclk; + goto err_disable_clks; } platform_set_drvdata(pdev, master); + pm_runtime_set_autosuspend_delay(&pdev->dev, SVC_I3C_PM_TIMEOUT_MS); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + svc_i3c_master_reset(master); /* Register the master */ ret = i3c_master_register(&master->base, &pdev->dev, &svc_i3c_master_ops, false); if (ret) - goto err_disable_sclk; + goto rpm_disable; + + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); return 0; -err_disable_sclk: - clk_disable_unprepare(master->sclk); +rpm_disable: + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_disable(&pdev->dev); -err_disable_fclk: - clk_disable_unprepare(master->fclk); - -err_disable_pclk: - clk_disable_unprepare(master->pclk); +err_disable_clks: + svc_i3c_master_unprepare_clks(master); return ret; } @@ -1488,13 +1576,40 @@ static int svc_i3c_master_remove(struct platform_device *pdev) if (ret) return ret; - clk_disable_unprepare(master->pclk); - clk_disable_unprepare(master->fclk); - clk_disable_unprepare(master->sclk); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); return 0; } +static int __maybe_unused svc_i3c_runtime_suspend(struct device *dev) +{ + struct svc_i3c_master *master = dev_get_drvdata(dev); + + svc_i3c_master_unprepare_clks(master); + pinctrl_pm_select_sleep_state(dev); + + return 0; +} + +static int __maybe_unused svc_i3c_runtime_resume(struct device *dev) +{ + struct svc_i3c_master *master = dev_get_drvdata(dev); + int ret = 0; + + pinctrl_pm_select_default_state(dev); + svc_i3c_master_prepare_clks(master); + + return ret; +} + +static const struct dev_pm_ops svc_i3c_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(svc_i3c_runtime_suspend, + svc_i3c_runtime_resume, NULL) +}; + static const struct of_device_id svc_i3c_master_of_match_tbl[] = { { .compatible = "silvaco,i3c-master" }, { /* sentinel */ }, @@ -1506,6 +1621,7 @@ static struct platform_driver svc_i3c_master = { .driver = { .name = "silvaco-i3c-master", .of_match_table = svc_i3c_master_of_match_tbl, + .pm = &svc_i3c_pm_ops, }, }; module_platform_driver(svc_i3c_master); From c5d4587bb9a9a03b30bbc928b4a007fcd1f8c279 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 27 Dec 2021 15:45:28 +0800 Subject: [PATCH 204/493] i3c: master: svc: add the missing module device table The missing MODULE_DEVICE_TABLE() will cause the svc-i3c-master cannot be auto probed when it is built in moudle. So add it. Signed-off-by: Clark Wang Reviewed-by: Miquel Raynal Reviewed-by: Jun Li Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211227074529.1660398-8-xiaoning.wang@nxp.com --- drivers/i3c/master/svc-i3c-master.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 884f5349fb76..3bc81ef95334 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -1614,6 +1614,7 @@ static const struct of_device_id svc_i3c_master_of_match_tbl[] = { { .compatible = "silvaco,i3c-master" }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, svc_i3c_master_of_match_tbl); static struct platform_driver svc_i3c_master = { .probe = svc_i3c_master_probe, From 7ff730ca458e841dbcdc87f264d7afe3eaed525e Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 27 Dec 2021 15:45:29 +0800 Subject: [PATCH 205/493] i3c: master: svc: enable the interrupt in the enable ibi function If enable interrupt in the svc_i3c_master_bus_init() but do not call enable ibi in the device driver, it will cause a kernel dump in the svc_i3c_master_handle_ibi() when a slave start occurs on the i3c bus, because the data->ibi_pool is not initialized. So only enable the interrupt in svc_i3c_master_enable_ibi() function. Signed-off-by: Clark Wang Reviewed-by: Miquel Raynal Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211227074529.1660398-9-xiaoning.wang@nxp.com --- drivers/i3c/master/svc-i3c-master.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 3bc81ef95334..7550dad64ecf 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -569,8 +569,6 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) if (ret) goto rpm_out; - svc_i3c_master_enable_interrupts(master, SVC_I3C_MINT_SLVSTART); - rpm_out: pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); @@ -1400,6 +1398,8 @@ static int svc_i3c_master_enable_ibi(struct i3c_dev_desc *dev) return ret; } + svc_i3c_master_enable_interrupts(master, SVC_I3C_MINT_SLVSTART); + return i3c_master_enec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); } @@ -1409,6 +1409,8 @@ static int svc_i3c_master_disable_ibi(struct i3c_dev_desc *dev) struct svc_i3c_master *master = to_svc_i3c_master(m); int ret; + svc_i3c_master_disable_interrupts(master); + ret = i3c_master_disec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); pm_runtime_mark_last_busy(master->dev); From 7a2bccd1a27f0c8fc87e4ed56abd6ea9fa7314a6 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 3 Jan 2022 10:45:04 +0100 Subject: [PATCH 206/493] i3c: master: mipi-i3c-hci: correct the config reference for endianness The referred config BIG_ENDIAN does not exist. The config for the endianness of the CPU architecture is called CPU_BIG_ENDIAN. Correct the config name to the existing config for the endianness. Signed-off-by: Lukas Bulwahn Acked-by: Nicolas Pitre Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220103094504.3602-1-lukas.bulwahn@gmail.com --- drivers/i3c/master/mipi-i3c-hci/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index 1b73647cc3b1..8c01123dc4ed 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -662,7 +662,7 @@ static int i3c_hci_init(struct i3c_hci *hci) /* Make sure our data ordering fits the host's */ regval = reg_read(HC_CONTROL); - if (IS_ENABLED(CONFIG_BIG_ENDIAN)) { + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) { if (!(regval & HC_CONTROL_DATA_BIG_ENDIAN)) { regval |= HC_CONTROL_DATA_BIG_ENDIAN; reg_write(HC_CONTROL, regval); From 1da9bf73033d9867096c47236c5e04fbf90ca790 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Thu, 18 Nov 2021 17:29:51 +0100 Subject: [PATCH 207/493] dt-bindings: watchdog: Realtek Otto WDT binding Add a binding description for Realtek's watchdog timer as found on several of their MIPS-based SoCs (codenamed Otto), such as the RTL838x, RTL839x, and RTL930x series of switch SoCs. Signed-off-by: Sander Vanheule Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/d832d5b02976dd2c2674d46778f61e5cfcd9b651.1637252610.git.sander@svanheule.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/realtek,otto-wdt.yaml | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml diff --git a/Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml b/Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml new file mode 100644 index 000000000000..11b220a5e0f6 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/realtek,otto-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Realtek Otto watchdog timer + +maintainers: + - Sander Vanheule + +description: | + The timer has two timeout phases. Both phases have a maximum duration of 32 + prescaled clock ticks, which is ca. 43s with a bus clock of 200MHz. The + minimum duration of each phase is one tick. Each phase can trigger an + interrupt, although the phase 2 interrupt will occur with the system reset. + - Phase 1: During this phase, the WDT can be pinged to reset the timeout. + - Phase 2: Starts after phase 1 has timed out, and only serves to give the + system some time to clean up, or notify others that it's going to reset. + During this phase, pinging the WDT has no effect, and a reset is + unavoidable, unless the WDT is disabled. + +allOf: + - $ref: watchdog.yaml# + +properties: + compatible: + enum: + - realtek,rtl8380-wdt + - realtek,rtl8390-wdt + - realtek,rtl9300-wdt + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + items: + - description: interrupt specifier for pretimeout + - description: interrupt specifier for timeout + + interrupt-names: + items: + - const: phase1 + - const: phase2 + + realtek,reset-mode: + $ref: /schemas/types.yaml#/definitions/string + description: | + Specify how the system is reset after a timeout. Defaults to "cpu" if + left unspecified. + oneOf: + - description: Reset the entire chip + const: soc + - description: | + Reset the CPU and IPsec engine, but leave other peripherals untouched + const: cpu + - description: | + Reset the execution pointer, but don't actually reset any hardware + const: software + +required: + - compatible + - reg + - clocks + - interrupts + +unevaluatedProperties: false + +dependencies: + interrupts: [ interrupt-names ] + +examples: + - | + watchdog: watchdog@3150 { + compatible = "realtek,rtl8380-wdt"; + reg = <0x3150 0xc>; + + realtek,reset-mode = "soc"; + + clocks = <&lxbus_clock>; + timeout-sec = <20>; + + interrupt-parent = <&rtlintc>; + interrupt-names = "phase1", "phase2"; + interrupts = <19>, <18>; + }; + +... From 7d7267ae639d569d74fc549f7c56cb39508c4b21 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Thu, 18 Nov 2021 17:29:52 +0100 Subject: [PATCH 208/493] watchdog: Add Realtek Otto watchdog timer Realtek MIPS SoCs (platform name Otto) have a watchdog timer with pretimeout notifitication support. The WDT can (partially) hard reset, or soft reset the SoC. This driver implements all features as described in the devicetree binding, except the phase2 interrupt, and also functions as a restart handler. The cpu reset mode is considered to be a "warm" restart, since this mode does not reset all peripherals. Being an embedded system though, the "cpu" and "software" modes will still cause the bootloader to run on restart. It is not known how a forced system reset can be disabled on the supported platforms. This means that the phase2 interrupt will only fire at the same time as reset, so implementing phase2 is of little use. Signed-off-by: Sander Vanheule Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/6d060bccbdcc709cfa79203485db85aad3c3beb5.1637252610.git.sander@svanheule.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- MAINTAINERS | 7 + drivers/watchdog/Kconfig | 13 + drivers/watchdog/Makefile | 1 + drivers/watchdog/realtek_otto_wdt.c | 384 ++++++++++++++++++++++++++++ 4 files changed, 405 insertions(+) create mode 100644 drivers/watchdog/realtek_otto_wdt.c diff --git a/MAINTAINERS b/MAINTAINERS index fb18ce7168aa..65595bed625a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16142,6 +16142,13 @@ S: Maintained F: include/sound/rt*.h F: sound/soc/codecs/rt* +REALTEK OTTO WATCHDOG +M: Sander Vanheule +L: linux-watchdog@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml +F: drivers/watchdog/realtek_otto_wdt.c + REALTEK RTL83xx SMI DSA ROUTER CHIPS M: Linus Walleij S: Maintained diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 73084e008c2b..bac8901072e2 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -941,6 +941,19 @@ config RTD119X_WATCHDOG Say Y here to include support for the watchdog timer in Realtek RTD1295 SoCs. +config REALTEK_OTTO_WDT + tristate "Realtek Otto MIPS watchdog support" + depends on MACH_REALTEK_RTL || COMPILE_TEST + depends on COMMON_CLK + select WATCHDOG_CORE + default MACH_REALTEK_RTL + help + Say Y here to include support for the watchdog timer on Realtek + RTL838x, RTL839x, RTL930x SoCs. This watchdog has pretimeout + notifications and system reset on timeout. + + When built as a module this will be called realtek_otto_wdt. + config SPRD_WATCHDOG tristate "Spreadtrum watchdog support" depends on ARCH_SPRD || COMPILE_TEST diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index b01007c0396c..92d8d8530ac6 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -169,6 +169,7 @@ obj-$(CONFIG_IMGPDC_WDT) += imgpdc_wdt.o obj-$(CONFIG_MT7621_WDT) += mt7621_wdt.o obj-$(CONFIG_PIC32_WDT) += pic32-wdt.o obj-$(CONFIG_PIC32_DMT) += pic32-dmt.o +obj-$(CONFIG_REALTEK_OTTO_WDT) += realtek_otto_wdt.o # PARISC Architecture diff --git a/drivers/watchdog/realtek_otto_wdt.c b/drivers/watchdog/realtek_otto_wdt.c new file mode 100644 index 000000000000..60058a0c3ec4 --- /dev/null +++ b/drivers/watchdog/realtek_otto_wdt.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Realtek Otto MIPS platform watchdog + * + * Watchdog timer that will reset the system after timeout, using the selected + * reset mode. + * + * Counter scaling and timeouts: + * - Base prescale of (2 << 25), providing tick duration T_0: 168ms @ 200MHz + * - PRESCALE: logarithmic prescaler adding a factor of {1, 2, 4, 8} + * - Phase 1: Times out after (PHASE1 + 1) × PRESCALE × T_0 + * Generates an interrupt, WDT cannot be stopped after phase 1 + * - Phase 2: starts after phase 1, times out after (PHASE2 + 1) × PRESCALE × T_0 + * Resets the system according to RST_MODE + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OTTO_WDT_REG_CNTR 0x0 +#define OTTO_WDT_CNTR_PING BIT(31) + +#define OTTO_WDT_REG_INTR 0x4 +#define OTTO_WDT_INTR_PHASE_1 BIT(31) +#define OTTO_WDT_INTR_PHASE_2 BIT(30) + +#define OTTO_WDT_REG_CTRL 0x8 +#define OTTO_WDT_CTRL_ENABLE BIT(31) +#define OTTO_WDT_CTRL_PRESCALE GENMASK(30, 29) +#define OTTO_WDT_CTRL_PHASE1 GENMASK(26, 22) +#define OTTO_WDT_CTRL_PHASE2 GENMASK(19, 15) +#define OTTO_WDT_CTRL_RST_MODE GENMASK(1, 0) +#define OTTO_WDT_MODE_SOC 0 +#define OTTO_WDT_MODE_CPU 1 +#define OTTO_WDT_MODE_SOFTWARE 2 +#define OTTO_WDT_CTRL_DEFAULT OTTO_WDT_MODE_CPU + +#define OTTO_WDT_PRESCALE_MAX 3 + +/* + * One higher than the max values contained in PHASE{1,2}, since a value of 0 + * corresponds to one tick. + */ +#define OTTO_WDT_PHASE_TICKS_MAX 32 + +/* + * The maximum reset delay is actually 2×32 ticks, but that would require large + * pretimeout values for timeouts longer than 32 ticks. Limit the maximum timeout + * to 32 + 1 to ensure small pretimeout values can be configured as expected. + */ +#define OTTO_WDT_TIMEOUT_TICKS_MAX (OTTO_WDT_PHASE_TICKS_MAX + 1) + +struct otto_wdt_ctrl { + struct watchdog_device wdev; + struct device *dev; + void __iomem *base; + unsigned int clk_rate_khz; + int irq_phase1; +}; + +static int otto_wdt_start(struct watchdog_device *wdev) +{ + struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev); + u32 v; + + v = ioread32(ctrl->base + OTTO_WDT_REG_CTRL); + v |= OTTO_WDT_CTRL_ENABLE; + iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL); + + return 0; +} + +static int otto_wdt_stop(struct watchdog_device *wdev) +{ + struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev); + u32 v; + + v = ioread32(ctrl->base + OTTO_WDT_REG_CTRL); + v &= ~OTTO_WDT_CTRL_ENABLE; + iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL); + + return 0; +} + +static int otto_wdt_ping(struct watchdog_device *wdev) +{ + struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev); + + iowrite32(OTTO_WDT_CNTR_PING, ctrl->base + OTTO_WDT_REG_CNTR); + + return 0; +} + +static int otto_wdt_tick_ms(struct otto_wdt_ctrl *ctrl, int prescale) +{ + return DIV_ROUND_CLOSEST(1 << (25 + prescale), ctrl->clk_rate_khz); +} + +/* + * The timer asserts the PHASE1/PHASE2 IRQs when the number of ticks exceeds + * the value stored in those fields. This means each phase will run for at least + * one tick, so small values need to be clamped to correctly reflect the timeout. + */ +static inline unsigned int div_round_ticks(unsigned int val, unsigned int tick_duration, + unsigned int min_ticks) +{ + return max(min_ticks, DIV_ROUND_UP(val, tick_duration)); +} + +static int otto_wdt_determine_timeouts(struct watchdog_device *wdev, unsigned int timeout, + unsigned int pretimeout) +{ + struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev); + unsigned int pretimeout_ms = pretimeout * 1000; + unsigned int timeout_ms = timeout * 1000; + unsigned int prescale_next = 0; + unsigned int phase1_ticks; + unsigned int phase2_ticks; + unsigned int total_ticks; + unsigned int prescale; + unsigned int tick_ms; + u32 v; + + do { + prescale = prescale_next; + if (prescale > OTTO_WDT_PRESCALE_MAX) + return -EINVAL; + + tick_ms = otto_wdt_tick_ms(ctrl, prescale); + total_ticks = div_round_ticks(timeout_ms, tick_ms, 2); + phase1_ticks = div_round_ticks(timeout_ms - pretimeout_ms, tick_ms, 1); + phase2_ticks = total_ticks - phase1_ticks; + + prescale_next++; + } while (phase1_ticks > OTTO_WDT_PHASE_TICKS_MAX + || phase2_ticks > OTTO_WDT_PHASE_TICKS_MAX); + + v = ioread32(ctrl->base + OTTO_WDT_REG_CTRL); + + v &= ~(OTTO_WDT_CTRL_PRESCALE | OTTO_WDT_CTRL_PHASE1 | OTTO_WDT_CTRL_PHASE2); + v |= FIELD_PREP(OTTO_WDT_CTRL_PHASE1, phase1_ticks - 1); + v |= FIELD_PREP(OTTO_WDT_CTRL_PHASE2, phase2_ticks - 1); + v |= FIELD_PREP(OTTO_WDT_CTRL_PRESCALE, prescale); + + iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL); + + timeout_ms = total_ticks * tick_ms; + ctrl->wdev.timeout = timeout_ms / 1000; + + pretimeout_ms = phase2_ticks * tick_ms; + ctrl->wdev.pretimeout = pretimeout_ms / 1000; + + return 0; +} + +static int otto_wdt_set_timeout(struct watchdog_device *wdev, unsigned int val) +{ + return otto_wdt_determine_timeouts(wdev, val, min(wdev->pretimeout, val - 1)); +} + +static int otto_wdt_set_pretimeout(struct watchdog_device *wdev, unsigned int val) +{ + return otto_wdt_determine_timeouts(wdev, wdev->timeout, val); +} + +static int otto_wdt_restart(struct watchdog_device *wdev, unsigned long reboot_mode, + void *data) +{ + struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev); + u32 reset_mode; + u32 v; + + disable_irq(ctrl->irq_phase1); + + switch (reboot_mode) { + case REBOOT_SOFT: + reset_mode = OTTO_WDT_MODE_SOFTWARE; + break; + case REBOOT_WARM: + reset_mode = OTTO_WDT_MODE_CPU; + break; + default: + reset_mode = OTTO_WDT_MODE_SOC; + break; + } + + /* Configure for shortest timeout and wait for reset to occur */ + v = FIELD_PREP(OTTO_WDT_CTRL_RST_MODE, reset_mode) | OTTO_WDT_CTRL_ENABLE; + iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL); + + mdelay(3 * otto_wdt_tick_ms(ctrl, 0)); + + return 0; +} + +static irqreturn_t otto_wdt_phase1_isr(int irq, void *dev_id) +{ + struct otto_wdt_ctrl *ctrl = dev_id; + + iowrite32(OTTO_WDT_INTR_PHASE_1, ctrl->base + OTTO_WDT_REG_INTR); + dev_crit(ctrl->dev, "phase 1 timeout\n"); + watchdog_notify_pretimeout(&ctrl->wdev); + + return IRQ_HANDLED; +} + +static const struct watchdog_ops otto_wdt_ops = { + .owner = THIS_MODULE, + .start = otto_wdt_start, + .stop = otto_wdt_stop, + .ping = otto_wdt_ping, + .set_timeout = otto_wdt_set_timeout, + .set_pretimeout = otto_wdt_set_pretimeout, + .restart = otto_wdt_restart, +}; + +static const struct watchdog_info otto_wdt_info = { + .identity = "Realtek Otto watchdog timer", + .options = WDIOF_KEEPALIVEPING | + WDIOF_MAGICCLOSE | + WDIOF_SETTIMEOUT | + WDIOF_PRETIMEOUT, +}; + +static void otto_wdt_clock_action(void *data) +{ + clk_disable_unprepare(data); +} + +static int otto_wdt_probe_clk(struct otto_wdt_ctrl *ctrl) +{ + struct clk *clk = devm_clk_get(ctrl->dev, NULL); + int ret; + + if (IS_ERR(clk)) + return dev_err_probe(ctrl->dev, PTR_ERR(clk), "Failed to get clock\n"); + + ret = clk_prepare_enable(clk); + if (ret) + return dev_err_probe(ctrl->dev, ret, "Failed to enable clock\n"); + + ret = devm_add_action_or_reset(ctrl->dev, otto_wdt_clock_action, clk); + if (ret) + return ret; + + ctrl->clk_rate_khz = clk_get_rate(clk) / 1000; + if (ctrl->clk_rate_khz == 0) + return dev_err_probe(ctrl->dev, -ENXIO, "Failed to get clock rate\n"); + + return 0; +} + +static int otto_wdt_probe_reset_mode(struct otto_wdt_ctrl *ctrl) +{ + static const char *mode_property = "realtek,reset-mode"; + const struct fwnode_handle *node = ctrl->dev->fwnode; + int mode_count; + u32 mode; + u32 v; + + if (!node) + return -ENXIO; + + mode_count = fwnode_property_string_array_count(node, mode_property); + if (mode_count < 0) + return mode_count; + else if (mode_count == 0) + return 0; + else if (mode_count != 1) + return -EINVAL; + + if (fwnode_property_match_string(node, mode_property, "soc") == 0) + mode = OTTO_WDT_MODE_SOC; + else if (fwnode_property_match_string(node, mode_property, "cpu") == 0) + mode = OTTO_WDT_MODE_CPU; + else if (fwnode_property_match_string(node, mode_property, "software") == 0) + mode = OTTO_WDT_MODE_SOFTWARE; + else + return -EINVAL; + + v = ioread32(ctrl->base + OTTO_WDT_REG_CTRL); + v &= ~OTTO_WDT_CTRL_RST_MODE; + v |= FIELD_PREP(OTTO_WDT_CTRL_RST_MODE, mode); + iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL); + + return 0; +} + +static int otto_wdt_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct otto_wdt_ctrl *ctrl; + unsigned int max_tick_ms; + int ret; + + ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL); + if (!ctrl) + return -ENOMEM; + + ctrl->dev = dev; + ctrl->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ctrl->base)) + return PTR_ERR(ctrl->base); + + /* Clear any old interrupts and reset initial state */ + iowrite32(OTTO_WDT_INTR_PHASE_1 | OTTO_WDT_INTR_PHASE_2, + ctrl->base + OTTO_WDT_REG_INTR); + iowrite32(OTTO_WDT_CTRL_DEFAULT, ctrl->base + OTTO_WDT_REG_CTRL); + + ret = otto_wdt_probe_clk(ctrl); + if (ret) + return ret; + + ctrl->irq_phase1 = platform_get_irq_byname(pdev, "phase1"); + if (ctrl->irq_phase1 < 0) + return ctrl->irq_phase1; + + ret = devm_request_irq(dev, ctrl->irq_phase1, otto_wdt_phase1_isr, 0, + "realtek-otto-wdt", ctrl); + if (ret) + return dev_err_probe(dev, ret, "Failed to get IRQ for phase1\n"); + + ret = otto_wdt_probe_reset_mode(ctrl); + if (ret) + return dev_err_probe(dev, ret, "Invalid reset mode specified\n"); + + ctrl->wdev.parent = dev; + ctrl->wdev.info = &otto_wdt_info; + ctrl->wdev.ops = &otto_wdt_ops; + + /* + * Since pretimeout cannot be disabled, min. timeout is twice the + * subsystem resolution. Max. timeout is ca. 43s at a bus clock of 200MHz. + */ + ctrl->wdev.min_timeout = 2; + max_tick_ms = otto_wdt_tick_ms(ctrl, OTTO_WDT_PRESCALE_MAX); + ctrl->wdev.max_hw_heartbeat_ms = max_tick_ms * OTTO_WDT_TIMEOUT_TICKS_MAX; + ctrl->wdev.timeout = min(30U, ctrl->wdev.max_hw_heartbeat_ms / 1000); + + watchdog_set_drvdata(&ctrl->wdev, ctrl); + watchdog_init_timeout(&ctrl->wdev, 0, dev); + watchdog_stop_on_reboot(&ctrl->wdev); + watchdog_set_restart_priority(&ctrl->wdev, 128); + + ret = otto_wdt_determine_timeouts(&ctrl->wdev, ctrl->wdev.timeout, 1); + if (ret) + return dev_err_probe(dev, ret, "Failed to set timeout\n"); + + return devm_watchdog_register_device(dev, &ctrl->wdev); +} + +static const struct of_device_id otto_wdt_ids[] = { + { .compatible = "realtek,rtl8380-wdt" }, + { .compatible = "realtek,rtl8390-wdt" }, + { .compatible = "realtek,rtl9300-wdt" }, + { } +}; +MODULE_DEVICE_TABLE(of, otto_wdt_ids); + +static struct platform_driver otto_wdt_driver = { + .probe = otto_wdt_probe, + .driver = { + .name = "realtek-otto-watchdog", + .of_match_table = otto_wdt_ids, + }, +}; +module_platform_driver(otto_wdt_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Sander Vanheule "); +MODULE_DESCRIPTION("Realtek Otto watchdog timer driver"); From 7d608c33cb5843cc9a7f4d86d4cd6a8514cd04a7 Mon Sep 17 00:00:00 2001 From: Andrej Picej Date: Mon, 29 Nov 2021 14:49:38 +0100 Subject: [PATCH 209/493] watchdog: da9063: Add hard dependency on I2C Commit 5ea29919c294 ("watchdog: da9063: use atomic safe i2c transfer in reset handler") implements atomic save i2c transfer which uses i2c functions directly. Add I2C hard dependency which overrides COMPILE_TEST. Reported-by: kernel test robot Fixes: 968011a291f3 ("watchdog: da9063: use atomic safe i2c transfer in reset handler") Signed-off-by: Andrej Picej Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211129134938.3273289-1-andrej.picej@norik.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index bac8901072e2..d103801e7cc0 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -207,6 +207,7 @@ config DA9055_WATCHDOG config DA9063_WATCHDOG tristate "Dialog DA9063 Watchdog" depends on MFD_DA9063 || COMPILE_TEST + depends on I2C select WATCHDOG_CORE help Support for the watchdog in the DA9063 PMIC. From ab02a00c9e32a5eb1525689b990ad9d345f0832e Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 30 Nov 2021 19:53:56 +0000 Subject: [PATCH 210/493] dt-bindings: watchdog: renesas,wdt: Add support for RZ/G2L Describe the WDT hardware in the RZ/G2L series. Signed-off-by: Biju Das Reviewed-by: Geert Uytterhoeven Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211130195357.18626-2-biju.das.jz@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/renesas,wdt.yaml | 75 ++++++++++++++----- 1 file changed, 57 insertions(+), 18 deletions(-) diff --git a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml index ab66d3f0c476..91a98ccd4226 100644 --- a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml @@ -10,9 +10,6 @@ maintainers: - Wolfram Sang - Geert Uytterhoeven -allOf: - - $ref: "watchdog.yaml#" - properties: compatible: oneOf: @@ -22,6 +19,11 @@ properties: - renesas,r7s9210-wdt # RZ/A2 - const: renesas,rza-wdt # RZ/A + - items: + - enum: + - renesas,r9a07g044-wdt # RZ/G2{L,LC} + - const: renesas,rzg2l-wdt # RZ/G2L + - items: - enum: - renesas,r8a7742-wdt # RZ/G1H @@ -56,11 +58,13 @@ properties: reg: maxItems: 1 - interrupts: - maxItems: 1 + interrupts: true - clocks: - maxItems: 1 + interrupt-names: true + + clocks: true + + clock-names: true power-domains: maxItems: 1 @@ -75,17 +79,52 @@ required: - reg - clocks -if: - not: - properties: - compatible: - contains: - enum: - - renesas,rza-wdt -then: - required: - - power-domains - - resets +allOf: + - $ref: "watchdog.yaml#" + + - if: + not: + properties: + compatible: + contains: + enum: + - renesas,rza-wdt + then: + required: + - power-domains + - resets + + - if: + properties: + compatible: + contains: + enum: + - renesas,rzg2l-wdt + then: + properties: + interrupts: + maxItems: 2 + interrupt-names: + items: + - const: wdt + - const: perrout + clocks: + items: + - description: Register access clock + - description: Main clock + clock-names: + items: + - const: pclk + - const: oscclk + required: + - clock-names + - interrupt-names + else: + properties: + interrupts: + maxItems: 1 + clocks: + maxItems: 1 additionalProperties: false From 2cbc5cd0b55fa2310cc557c77b0665f5e00272de Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 30 Nov 2021 19:53:57 +0000 Subject: [PATCH 211/493] watchdog: Add Watchdog Timer driver for RZ/G2L Add Watchdog Timer driver for RZ/G2L SoC. WDT IP block supports normal watchdog timer function and reset request function due to CPU parity error. This driver currently supports normal watchdog timer function and later will add support for reset request function due to CPU parity error. Signed-off-by: Biju Das Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211130195357.18626-3-biju.das.jz@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 8 ++ drivers/watchdog/Makefile | 1 + drivers/watchdog/rzg2l_wdt.c | 263 +++++++++++++++++++++++++++++++++++ 3 files changed, 272 insertions(+) create mode 100644 drivers/watchdog/rzg2l_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index d103801e7cc0..3b30b31fcced 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -883,6 +883,14 @@ config RENESAS_RZAWDT This driver adds watchdog support for the integrated watchdogs in the Renesas RZ/A SoCs. These watchdogs can be used to reset a system. +config RENESAS_RZG2LWDT + tristate "Renesas RZ/G2L WDT Watchdog" + depends on ARCH_RENESAS || COMPILE_TEST + select WATCHDOG_CORE + help + This driver adds watchdog support for the integrated watchdogs in the + Renesas RZ/G2L SoCs. These watchdogs can be used to reset a system. + config ASPEED_WATCHDOG tristate "Aspeed BMC watchdog support" depends on ARCH_ASPEED || COMPILE_TEST diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 92d8d8530ac6..6c991e2cf1db 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -84,6 +84,7 @@ obj-$(CONFIG_LPC18XX_WATCHDOG) += lpc18xx_wdt.o obj-$(CONFIG_BCM7038_WDT) += bcm7038_wdt.o obj-$(CONFIG_RENESAS_WDT) += renesas_wdt.o obj-$(CONFIG_RENESAS_RZAWDT) += rza_wdt.o +obj-$(CONFIG_RENESAS_RZG2LWDT) += rzg2l_wdt.o obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o obj-$(CONFIG_STM32_WATCHDOG) += stm32_iwdg.o obj-$(CONFIG_UNIPHIER_WATCHDOG) += uniphier_wdt.o diff --git a/drivers/watchdog/rzg2l_wdt.c b/drivers/watchdog/rzg2l_wdt.c new file mode 100644 index 000000000000..6b426df34fd6 --- /dev/null +++ b/drivers/watchdog/rzg2l_wdt.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Renesas RZ/G2L WDT Watchdog Driver + * + * Copyright (C) 2021 Renesas Electronics Corporation + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define WDTCNT 0x00 +#define WDTSET 0x04 +#define WDTTIM 0x08 +#define WDTINT 0x0C +#define WDTCNT_WDTEN BIT(0) +#define WDTINT_INTDISP BIT(0) + +#define WDT_DEFAULT_TIMEOUT 60U + +/* Setting period time register only 12 bit set in WDTSET[31:20] */ +#define WDTSET_COUNTER_MASK (0xFFF00000) +#define WDTSET_COUNTER_VAL(f) ((f) << 20) + +#define F2CYCLE_NSEC(f) (1000000000 / (f)) + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +struct rzg2l_wdt_priv { + void __iomem *base; + struct watchdog_device wdev; + struct reset_control *rstc; + unsigned long osc_clk_rate; + unsigned long delay; +}; + +static void rzg2l_wdt_wait_delay(struct rzg2l_wdt_priv *priv) +{ + /* delay timer when change the setting register */ + ndelay(priv->delay); +} + +static u32 rzg2l_wdt_get_cycle_usec(unsigned long cycle, u32 wdttime) +{ + u64 timer_cycle_us = 1024 * 1024 * (wdttime + 1) * MICRO; + + return div64_ul(timer_cycle_us, cycle); +} + +static void rzg2l_wdt_write(struct rzg2l_wdt_priv *priv, u32 val, unsigned int reg) +{ + if (reg == WDTSET) + val &= WDTSET_COUNTER_MASK; + + writel_relaxed(val, priv->base + reg); + /* Registers other than the WDTINT is always synchronized with WDT_CLK */ + if (reg != WDTINT) + rzg2l_wdt_wait_delay(priv); +} + +static void rzg2l_wdt_init_timeout(struct watchdog_device *wdev) +{ + struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); + u32 time_out; + + /* Clear Lapsed Time Register and clear Interrupt */ + rzg2l_wdt_write(priv, WDTINT_INTDISP, WDTINT); + /* 2 consecutive overflow cycle needed to trigger reset */ + time_out = (wdev->timeout * (MICRO / 2)) / + rzg2l_wdt_get_cycle_usec(priv->osc_clk_rate, 0); + rzg2l_wdt_write(priv, WDTSET_COUNTER_VAL(time_out), WDTSET); +} + +static int rzg2l_wdt_start(struct watchdog_device *wdev) +{ + struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); + + reset_control_deassert(priv->rstc); + pm_runtime_get_sync(wdev->parent); + + /* Initialize time out */ + rzg2l_wdt_init_timeout(wdev); + + /* Initialize watchdog counter register */ + rzg2l_wdt_write(priv, 0, WDTTIM); + + /* Enable watchdog timer*/ + rzg2l_wdt_write(priv, WDTCNT_WDTEN, WDTCNT); + + return 0; +} + +static int rzg2l_wdt_stop(struct watchdog_device *wdev) +{ + struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); + + pm_runtime_put(wdev->parent); + reset_control_assert(priv->rstc); + + return 0; +} + +static int rzg2l_wdt_restart(struct watchdog_device *wdev, + unsigned long action, void *data) +{ + struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); + + /* Reset the module before we modify any register */ + reset_control_reset(priv->rstc); + pm_runtime_get_sync(wdev->parent); + + /* smallest counter value to reboot soon */ + rzg2l_wdt_write(priv, WDTSET_COUNTER_VAL(1), WDTSET); + + /* Enable watchdog timer*/ + rzg2l_wdt_write(priv, WDTCNT_WDTEN, WDTCNT); + + return 0; +} + +static const struct watchdog_info rzg2l_wdt_ident = { + .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT, + .identity = "Renesas RZ/G2L WDT Watchdog", +}; + +static int rzg2l_wdt_ping(struct watchdog_device *wdev) +{ + struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); + + rzg2l_wdt_write(priv, WDTINT_INTDISP, WDTINT); + + return 0; +} + +static const struct watchdog_ops rzg2l_wdt_ops = { + .owner = THIS_MODULE, + .start = rzg2l_wdt_start, + .stop = rzg2l_wdt_stop, + .ping = rzg2l_wdt_ping, + .restart = rzg2l_wdt_restart, +}; + +static void rzg2l_wdt_reset_assert_pm_disable_put(void *data) +{ + struct watchdog_device *wdev = data; + struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); + + pm_runtime_put(wdev->parent); + pm_runtime_disable(wdev->parent); + reset_control_assert(priv->rstc); +} + +static int rzg2l_wdt_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct rzg2l_wdt_priv *priv; + unsigned long pclk_rate; + struct clk *wdt_clk; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); + + /* Get watchdog main clock */ + wdt_clk = clk_get(&pdev->dev, "oscclk"); + if (IS_ERR(wdt_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(wdt_clk), "no oscclk"); + + priv->osc_clk_rate = clk_get_rate(wdt_clk); + clk_put(wdt_clk); + if (!priv->osc_clk_rate) + return dev_err_probe(&pdev->dev, -EINVAL, "oscclk rate is 0"); + + /* Get Peripheral clock */ + wdt_clk = clk_get(&pdev->dev, "pclk"); + if (IS_ERR(wdt_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(wdt_clk), "no pclk"); + + pclk_rate = clk_get_rate(wdt_clk); + clk_put(wdt_clk); + if (!pclk_rate) + return dev_err_probe(&pdev->dev, -EINVAL, "pclk rate is 0"); + + priv->delay = F2CYCLE_NSEC(priv->osc_clk_rate) * 6 + F2CYCLE_NSEC(pclk_rate) * 9; + + priv->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); + if (IS_ERR(priv->rstc)) + return dev_err_probe(&pdev->dev, PTR_ERR(priv->rstc), + "failed to get cpg reset"); + + reset_control_deassert(priv->rstc); + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { + dev_err(dev, "pm_runtime_resume_and_get failed ret=%pe", ERR_PTR(ret)); + goto out_pm_get; + } + + priv->wdev.info = &rzg2l_wdt_ident; + priv->wdev.ops = &rzg2l_wdt_ops; + priv->wdev.parent = dev; + priv->wdev.min_timeout = 1; + priv->wdev.max_timeout = rzg2l_wdt_get_cycle_usec(priv->osc_clk_rate, 0xfff) / + USEC_PER_SEC; + priv->wdev.timeout = WDT_DEFAULT_TIMEOUT; + + watchdog_set_drvdata(&priv->wdev, priv); + ret = devm_add_action_or_reset(&pdev->dev, + rzg2l_wdt_reset_assert_pm_disable_put, + &priv->wdev); + if (ret < 0) + return ret; + + watchdog_set_nowayout(&priv->wdev, nowayout); + watchdog_stop_on_unregister(&priv->wdev); + + ret = watchdog_init_timeout(&priv->wdev, 0, dev); + if (ret) + dev_warn(dev, "Specified timeout invalid, using default"); + + return devm_watchdog_register_device(&pdev->dev, &priv->wdev); + +out_pm_get: + pm_runtime_disable(dev); + reset_control_assert(priv->rstc); + + return ret; +} + +static const struct of_device_id rzg2l_wdt_ids[] = { + { .compatible = "renesas,rzg2l-wdt", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, rzg2l_wdt_ids); + +static struct platform_driver rzg2l_wdt_driver = { + .driver = { + .name = "rzg2l_wdt", + .of_match_table = rzg2l_wdt_ids, + }, + .probe = rzg2l_wdt_probe, +}; +module_platform_driver(rzg2l_wdt_driver); + +MODULE_DESCRIPTION("Renesas RZ/G2L WDT Watchdog Driver"); +MODULE_AUTHOR("Biju Das "); +MODULE_LICENSE("GPL v2"); From 1bafac47a4f70a169427ffcf59fb673d23e39105 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 8 Dec 2021 17:55:55 +0800 Subject: [PATCH 212/493] watchdog: mtk_wdt: use platform_get_irq_optional The watchdog pre-timeout (bark) interrupt is optional. Use platform_get_irq_optional() to avoid seeing such following error message: >>> mtk-wdt 10007000.watchdog: IRQ index 0 not found Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211208095555.4099551-1-tzungbi@google.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mtk_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index 543cf38bd04e..4577a76dd464 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -339,7 +339,7 @@ static int mtk_wdt_probe(struct platform_device *pdev) if (IS_ERR(mtk_wdt->wdt_base)) return PTR_ERR(mtk_wdt->wdt_base); - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq > 0) { err = devm_request_irq(&pdev->dev, irq, mtk_wdt_isr, 0, "wdt_bark", &mtk_wdt->wdt_dev); From af5bb1c207997c179f1b5b40fced9c39e1d09383 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 6 Dec 2021 11:40:45 -0600 Subject: [PATCH 213/493] dt-bindings: watchdog: atmel: Add missing 'interrupts' property With 'unevaluatedProperties' support implemented, the atmel,sama5d4-wdt example has the following warning: /home/rob/proj/git/linux-dt/.build-arm64/Documentation/devicetree/bindings/watchdog/atmel,sama5d4-wdt.example.dt.yaml: watchdog@fc068640: Unevaluated properties are not allowed ('interrupts' was unexpected) Document the missing 'interrupts' property. Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Ludovic Desroches Cc: Eugen Hristev Cc: linux-watchdog@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Rob Herring Acked-by: Guenter Roeck Reviewed-by: Thierry Reding Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20211206174045.2294873-1-robh@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/atmel,sama5d4-wdt.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/atmel,sama5d4-wdt.yaml b/Documentation/devicetree/bindings/watchdog/atmel,sama5d4-wdt.yaml index 9856cd76c28d..a9635c03761c 100644 --- a/Documentation/devicetree/bindings/watchdog/atmel,sama5d4-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/atmel,sama5d4-wdt.yaml @@ -22,6 +22,9 @@ properties: reg: maxItems: 1 + interrupts: + maxItems: 1 + atmel,watchdog-type: $ref: /schemas/types.yaml#/definitions/string description: should be hardware or software. From a51f589693899a0325a4381098bbb96e06204983 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 16 Dec 2021 21:47:47 +0000 Subject: [PATCH 214/493] watchdog: s3c2410: Use platform_get_irq() to get the interrupt platform_get_resource(pdev, IORESOURCE_IRQ, ..) relies on static allocation of IRQ resources in DT core code, this causes an issue when using hierarchical interrupt domains using "interrupts" property in the node as this bypassed the hierarchical setup and messed up the irq chaining. In preparation for removal of static setup of IRQ resource from DT core code use platform_get_irq(). Signed-off-by: Lad Prabhakar Reviewed-by: Guenter Roeck Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211216214747.10454-1-prabhakar.mahadev-lad.rj@bp.renesas.com [groeck: Fixed context conflicts] Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index bb374b9fc163..523a6707bb31 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -661,8 +661,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct s3c2410_wdt *wdt; - struct resource *wdt_irq; unsigned int wtcon; + int wdt_irq; int ret; wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); @@ -686,11 +686,9 @@ static int s3c2410wdt_probe(struct platform_device *pdev) } } - wdt_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (wdt_irq == NULL) { - dev_err(dev, "no irq resource specified\n"); - return -ENOENT; - } + wdt_irq = platform_get_irq(pdev, 0); + if (wdt_irq < 0) + return wdt_irq; /* get the memory region for the watchdog timer */ wdt->reg_base = devm_platform_ioremap_resource(pdev, 0); @@ -754,8 +752,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev) } } - ret = devm_request_irq(dev, wdt_irq->start, s3c2410wdt_irq, 0, - pdev->name, pdev); + ret = devm_request_irq(dev, wdt_irq, s3c2410wdt_irq, 0, + pdev->name, pdev); if (ret != 0) { dev_err(dev, "failed to install irq (%d)\n", ret); goto err_cpufreq; From f7bcb02390ad319ecc4161b9c9989f710fa6edb2 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sun, 12 Dec 2021 19:02:47 +0200 Subject: [PATCH 215/493] watchdog: s3c2410: Fix getting the optional clock "watchdog_src" clock is optional and may not be present for some SoCs supported by this driver. Nevertheless, in case the clock is provided but some error happens during its getting, that error should be handled properly. Use devm_clk_get_optional() API for that. Also report possible errors using dev_err_probe() to handle properly -EPROBE_DEFER error (if clock provider is not ready by the time WDT probe function is executed). Fixes: e249d01b5e8b ("watchdog: s3c2410: Support separate source clock") Reported-by: kernel test robot Reported-by: Dan Carpenter Suggested-by: Guenter Roeck Signed-off-by: Sam Protsenko Reviewed-by: Guenter Roeck Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211212170247.30646-1-semen.protsenko@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 523a6707bb31..6db22f2e3a4f 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -711,16 +711,18 @@ static int s3c2410wdt_probe(struct platform_device *pdev) * "watchdog_src" clock is optional; if it's not present -- just skip it * and use "watchdog" clock as both bus and source clock. */ - wdt->src_clk = devm_clk_get(dev, "watchdog_src"); - if (!IS_ERR(wdt->src_clk)) { - ret = clk_prepare_enable(wdt->src_clk); - if (ret < 0) { - dev_err(dev, "failed to enable source clock\n"); - ret = PTR_ERR(wdt->src_clk); - goto err_bus_clk; - } - } else { - wdt->src_clk = NULL; + wdt->src_clk = devm_clk_get_optional(dev, "watchdog_src"); + if (IS_ERR(wdt->src_clk)) { + dev_err_probe(dev, PTR_ERR(wdt->src_clk), + "failed to get source clock\n"); + ret = PTR_ERR(wdt->src_clk); + goto err_bus_clk; + } + + ret = clk_prepare_enable(wdt->src_clk); + if (ret) { + dev_err(dev, "failed to enable source clock\n"); + goto err_bus_clk; } wdt->wdt_device.min_timeout = 1; From b05e69f822914eb9a327ea325b8289ffc5e4b646 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 13 Dec 2021 09:26:08 +0100 Subject: [PATCH 216/493] dt-bindings: watchdog: Add SM6350 and SM8250 compatible Add devicetree compatible for the watchdog on SM6350 and SM8250 SoC. Signed-off-by: Luca Weiss Acked-by: Konrad Dybcio Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211213082614.22651-8-luca.weiss@fairphone.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml index ba60bdf1fecc..16c6f82a13ca 100644 --- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml @@ -20,7 +20,9 @@ properties: - qcom,apss-wdt-sc7280 - qcom,apss-wdt-sdm845 - qcom,apss-wdt-sdx55 + - qcom,apss-wdt-sm6350 - qcom,apss-wdt-sm8150 + - qcom,apss-wdt-sm8250 - qcom,kpss-timer - qcom,kpss-wdt - qcom,kpss-wdt-apq8064 From 4ed224aeaf661b63c2229df24a4d11a07e2653df Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Sat, 11 Dec 2021 13:36:33 +0100 Subject: [PATCH 217/493] watchdog: Add Apple SoC watchdog driver Add support for the watchdog timer found in Apple SoCs. This driver is also required to reboot these machines. Signed-off-by: Sven Peter Tested-by: Janne Grunau Reviewed-by: Hector Martin Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211211123633.4392-2-sven@svenpeter.dev Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 12 ++ drivers/watchdog/Makefile | 1 + drivers/watchdog/apple_wdt.c | 226 +++++++++++++++++++++++++++++++++++ 3 files changed, 239 insertions(+) create mode 100644 drivers/watchdog/apple_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 3b30b31fcced..dd9ff7c581ac 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -999,6 +999,18 @@ config MSC313E_WATCHDOG To compile this driver as a module, choose M here: the module will be called msc313e_wdt. +config APPLE_WATCHDOG + tristate "Apple SoC watchdog" + depends on ARCH_APPLE || COMPILE_TEST + select WATCHDOG_CORE + help + Say Y here to include support for the Watchdog found in Apple + SoCs such as the M1. Next to the common watchdog features this + driver is also required in order to reboot these SoCs. + + To compile this driver as a module, choose M here: the + module will be called apple_wdt. + # X86 (i386 + ia64 + x86_64) Architecture config ACQUIRE_WDT diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 6c991e2cf1db..2d2948814947 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_PM8916_WATCHDOG) += pm8916_wdt.o obj-$(CONFIG_ARM_SMC_WATCHDOG) += arm_smc_wdt.o obj-$(CONFIG_VISCONTI_WATCHDOG) += visconti_wdt.o obj-$(CONFIG_MSC313E_WATCHDOG) += msc313e_wdt.o +obj-$(CONFIG_APPLE_WATCHDOG) += apple_wdt.o # X86 (i386 + ia64 + x86_64) Architecture obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o diff --git a/drivers/watchdog/apple_wdt.c b/drivers/watchdog/apple_wdt.c new file mode 100644 index 000000000000..16aca21f13d6 --- /dev/null +++ b/drivers/watchdog/apple_wdt.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* + * Apple SoC Watchdog driver + * + * Copyright (C) The Asahi Linux Contributors + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Apple Watchdog MMIO registers + * + * This HW block has three separate watchdogs. WD0 resets the machine + * to recovery mode and is not very useful for us. WD1 and WD2 trigger a normal + * machine reset. WD0 additionally supports a configurable interrupt. + * This information can be used to implement pretimeout support at a later time. + * + * APPLE_WDT_WDx_CUR_TIME is a simple counter incremented for each tick of the + * reference clock. It can also be overwritten to any value. + * Whenever APPLE_WDT_CTRL_RESET_EN is set in APPLE_WDT_WDx_CTRL and + * APPLE_WDT_WDx_CUR_TIME >= APPLE_WDT_WDx_BITE_TIME the entire machine is + * reset. + * Whenever APPLE_WDT_CTRL_IRQ_EN is set and APPLE_WDTx_WD1_CUR_TIME >= + * APPLE_WDTx_WD1_BARK_TIME an interrupt is triggered and + * APPLE_WDT_CTRL_IRQ_STATUS is set. The interrupt can be cleared by writing + * 1 to APPLE_WDT_CTRL_IRQ_STATUS. + */ +#define APPLE_WDT_WD0_CUR_TIME 0x00 +#define APPLE_WDT_WD0_BITE_TIME 0x04 +#define APPLE_WDT_WD0_BARK_TIME 0x08 +#define APPLE_WDT_WD0_CTRL 0x0c + +#define APPLE_WDT_WD1_CUR_TIME 0x10 +#define APPLE_WDT_WD1_BITE_TIME 0x14 +#define APPLE_WDT_WD1_CTRL 0x1c + +#define APPLE_WDT_WD2_CUR_TIME 0x20 +#define APPLE_WDT_WD2_BITE_TIME 0x24 +#define APPLE_WDT_WD2_CTRL 0x2c + +#define APPLE_WDT_CTRL_IRQ_EN BIT(0) +#define APPLE_WDT_CTRL_IRQ_STATUS BIT(1) +#define APPLE_WDT_CTRL_RESET_EN BIT(2) + +#define APPLE_WDT_TIMEOUT_DEFAULT 30 + +struct apple_wdt { + struct watchdog_device wdd; + void __iomem *regs; + unsigned long clk_rate; +}; + +static struct apple_wdt *to_apple_wdt(struct watchdog_device *wdd) +{ + return container_of(wdd, struct apple_wdt, wdd); +} + +static int apple_wdt_start(struct watchdog_device *wdd) +{ + struct apple_wdt *wdt = to_apple_wdt(wdd); + + writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CUR_TIME); + writel_relaxed(APPLE_WDT_CTRL_RESET_EN, wdt->regs + APPLE_WDT_WD1_CTRL); + + return 0; +} + +static int apple_wdt_stop(struct watchdog_device *wdd) +{ + struct apple_wdt *wdt = to_apple_wdt(wdd); + + writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CTRL); + + return 0; +} + +static int apple_wdt_ping(struct watchdog_device *wdd) +{ + struct apple_wdt *wdt = to_apple_wdt(wdd); + + writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CUR_TIME); + + return 0; +} + +static int apple_wdt_set_timeout(struct watchdog_device *wdd, unsigned int s) +{ + struct apple_wdt *wdt = to_apple_wdt(wdd); + + writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CUR_TIME); + writel_relaxed(wdt->clk_rate * s, wdt->regs + APPLE_WDT_WD1_BITE_TIME); + + wdd->timeout = s; + + return 0; +} + +static unsigned int apple_wdt_get_timeleft(struct watchdog_device *wdd) +{ + struct apple_wdt *wdt = to_apple_wdt(wdd); + u32 cur_time, reset_time; + + cur_time = readl_relaxed(wdt->regs + APPLE_WDT_WD1_CUR_TIME); + reset_time = readl_relaxed(wdt->regs + APPLE_WDT_WD1_BITE_TIME); + + return (reset_time - cur_time) / wdt->clk_rate; +} + +static int apple_wdt_restart(struct watchdog_device *wdd, unsigned long mode, + void *cmd) +{ + struct apple_wdt *wdt = to_apple_wdt(wdd); + + writel_relaxed(APPLE_WDT_CTRL_RESET_EN, wdt->regs + APPLE_WDT_WD1_CTRL); + writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_BITE_TIME); + writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CUR_TIME); + + /* + * Flush writes and then wait for the SoC to reset. Even though the + * reset is queued almost immediately experiments have shown that it + * can take up to ~20-25ms until the SoC is actually reset. Just wait + * 50ms here to be safe. + */ + (void)readl_relaxed(wdt->regs + APPLE_WDT_WD1_CUR_TIME); + mdelay(50); + + return 0; +} + +static void apple_wdt_clk_disable_unprepare(void *data) +{ + clk_disable_unprepare(data); +} + +static struct watchdog_ops apple_wdt_ops = { + .owner = THIS_MODULE, + .start = apple_wdt_start, + .stop = apple_wdt_stop, + .ping = apple_wdt_ping, + .set_timeout = apple_wdt_set_timeout, + .get_timeleft = apple_wdt_get_timeleft, + .restart = apple_wdt_restart, +}; + +static struct watchdog_info apple_wdt_info = { + .identity = "Apple SoC Watchdog", + .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT, +}; + +static int apple_wdt_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct apple_wdt *wdt; + struct clk *clk; + u32 wdt_ctrl; + int ret; + + wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); + if (!wdt) + return -ENOMEM; + + wdt->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(wdt->regs)) + return PTR_ERR(wdt->regs); + + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + ret = clk_prepare_enable(clk); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, apple_wdt_clk_disable_unprepare, + clk); + if (ret) + return ret; + + wdt->clk_rate = clk_get_rate(clk); + if (!wdt->clk_rate) + return -EINVAL; + + wdt->wdd.ops = &apple_wdt_ops; + wdt->wdd.info = &apple_wdt_info; + wdt->wdd.max_timeout = U32_MAX / wdt->clk_rate; + wdt->wdd.timeout = APPLE_WDT_TIMEOUT_DEFAULT; + + wdt_ctrl = readl_relaxed(wdt->regs + APPLE_WDT_WD1_CTRL); + if (wdt_ctrl & APPLE_WDT_CTRL_RESET_EN) + set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); + + watchdog_init_timeout(&wdt->wdd, 0, dev); + apple_wdt_set_timeout(&wdt->wdd, wdt->wdd.timeout); + watchdog_stop_on_unregister(&wdt->wdd); + watchdog_set_restart_priority(&wdt->wdd, 128); + + return devm_watchdog_register_device(dev, &wdt->wdd); +} + +static const struct of_device_id apple_wdt_of_match[] = { + { .compatible = "apple,wdt" }, + {}, +}; +MODULE_DEVICE_TABLE(of, apple_wdt_of_match); + +static struct platform_driver apple_wdt_driver = { + .driver = { + .name = "apple-watchdog", + .of_match_table = apple_wdt_of_match, + }, + .probe = apple_wdt_probe, +}; +module_platform_driver(apple_wdt_driver); + +MODULE_DESCRIPTION("Apple SoC watchdog driver"); +MODULE_AUTHOR("Sven Peter "); +MODULE_LICENSE("Dual MIT/GPL"); From ffd264bd152cbf88fcf5ced04d3d380c77020231 Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Tue, 28 Dec 2021 16:34:27 +0900 Subject: [PATCH 218/493] watchdog: msc313e: Check if the WDT was running at boot Check if the WDT was running at boot and set the running flag if it was. This prevents the system from getting rebooted if the userland daemon doesn't take over soon enough or there isn't a userland daemon at all. Signed-off-by: Daniel Palmer Reviewed-by: Romain Perier Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211228073427.2443174-1-daniel@0x0f.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/msc313e_wdt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/watchdog/msc313e_wdt.c b/drivers/watchdog/msc313e_wdt.c index 0d497aa0fb7d..90171431fc59 100644 --- a/drivers/watchdog/msc313e_wdt.c +++ b/drivers/watchdog/msc313e_wdt.c @@ -120,6 +120,10 @@ static int msc313e_wdt_probe(struct platform_device *pdev) priv->wdev.max_timeout = U32_MAX / clk_get_rate(priv->clk); priv->wdev.timeout = MSC313E_WDT_DEFAULT_TIMEOUT; + /* If the period is non-zero the WDT is running */ + if (readw(priv->base + REG_WDT_MAX_PRD_L) | (readw(priv->base + REG_WDT_MAX_PRD_H) << 16)) + set_bit(WDOG_HW_RUNNING, &priv->wdev.status); + watchdog_set_drvdata(&priv->wdev, priv); watchdog_init_timeout(&priv->wdev, timeout, dev); From bccfb96b59179d4f96cbbd1ddff8fac6d335eae4 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:04 +0200 Subject: [PATCH 219/493] dmaengine: at_xdmac: Don't start transactions at tx_submit level tx_submit is supposed to push the current transaction descriptor to a pending queue, waiting for issue_pending() to be called. issue_pending() must start the transfer, not tx_submit(), thus remove at_xdmac_start_xfer() from at_xdmac_tx_submit(). Clients of at_xdmac that assume that tx_submit() starts the transfer must be updated and call dma_async_issue_pending() if they miss to call it (one example is atmel_serial). As the at_xdmac_start_xfer() is now called only from at_xdmac_advance_work() when !at_xdmac_chan_is_enabled(), the at_xdmac_chan_is_enabled() check is no longer needed in at_xdmac_start_xfer(), thus remove it. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-2-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index e42dede5b243..4ff12b083136 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -385,9 +385,6 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first); - if (at_xdmac_chan_is_enabled(atchan)) - return; - /* Set transfer as active to not try to start it again. */ first->active_xfer = true; @@ -479,9 +476,6 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx) dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n", __func__, atchan, desc); list_add_tail(&desc->xfer_node, &atchan->xfers_list); - if (list_is_singular(&atchan->xfers_list)) - at_xdmac_start_xfer(atchan, desc); - spin_unlock_irqrestore(&atchan->lock, irqflags); return cookie; } From e6af9b05bec63cd4d1de2a33968cd0be2a91282a Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:05 +0200 Subject: [PATCH 220/493] dmaengine: at_xdmac: Start transfer for cyclic channels in issue_pending Cyclic channels must too call issue_pending in order to start a transfer. Start the transfer in issue_pending regardless of the type of channel. This wrongly worked before, because in the past the transfer was started at tx_submit level when only a desc in the transfer list. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-3-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 4ff12b083136..c3d3e1270236 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1778,11 +1778,9 @@ static void at_xdmac_issue_pending(struct dma_chan *chan) dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__); - if (!at_xdmac_chan_is_cyclic(atchan)) { - spin_lock_irqsave(&atchan->lock, flags); - at_xdmac_advance_work(atchan); - spin_unlock_irqrestore(&atchan->lock, flags); - } + spin_lock_irqsave(&atchan->lock, flags); + at_xdmac_advance_work(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); return; } From 5edc24ac876a928f36f407a0fcdb33b94a3a210f Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:06 +0200 Subject: [PATCH 221/493] dmaengine: at_xdmac: Print debug message after realeasing the lock It is desirable to do the prints without the lock held if possible, so move the print after the lock is released. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-4-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index c3d3e1270236..7d3560acedbb 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -473,10 +473,12 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&atchan->lock, irqflags); cookie = dma_cookie_assign(tx); - dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n", - __func__, atchan, desc); list_add_tail(&desc->xfer_node, &atchan->xfers_list); spin_unlock_irqrestore(&atchan->lock, irqflags); + + dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n", + __func__, atchan, desc); + return cookie; } From 506875c30fc5bf92246060bc3b4c38799646266b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:07 +0200 Subject: [PATCH 222/493] dmaengine: at_xdmac: Fix concurrency over chan's completed_cookie Caller of dma_cookie_complete is expected to hold a lock to prevent concurrency over the channel's completed cookie marker. Call dma_cookie_complete() with the lock held. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-5-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 7d3560acedbb..83c031207530 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1703,11 +1703,10 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) } txd = &desc->tx_dma_desc; - + dma_cookie_complete(txd); at_xdmac_remove_xfer(atchan, desc); spin_unlock_irq(&atchan->lock); - dma_cookie_complete(txd); if (txd->flags & DMA_PREP_INTERRUPT) dmaengine_desc_get_callback_invoke(txd, NULL); From b63e5cb94ad6947ab5fe38b5a9417dcfd0bc6122 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:08 +0200 Subject: [PATCH 223/493] dmaengine: at_xdmac: Fix race for the tx desc callback The transfer descriptors were wrongly moved to the free descriptors list before calling the tx desc callback. As the DMA engine drivers drop any locks before calling the callback function, txd could be taken again, resulting in its callback called prematurely. Fix the race for the tx desc callback by moving the xfer desc into the free desc list after the callback is invoked. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-6-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 83c031207530..d5b37459f906 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1582,20 +1582,6 @@ spin_unlock: return ret; } -/* Call must be protected by lock. */ -static void at_xdmac_remove_xfer(struct at_xdmac_chan *atchan, - struct at_xdmac_desc *desc) -{ - dev_dbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); - - /* - * Remove the transfer from the transfer list then move the transfer - * descriptors into the free descriptors list. - */ - list_del(&desc->xfer_node); - list_splice_init(&desc->descs_list, &atchan->free_descs_list); -} - static void at_xdmac_advance_work(struct at_xdmac_chan *atchan) { struct at_xdmac_desc *desc; @@ -1704,7 +1690,8 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) txd = &desc->tx_dma_desc; dma_cookie_complete(txd); - at_xdmac_remove_xfer(atchan, desc); + /* Remove the transfer from the transfer list. */ + list_del(&desc->xfer_node); spin_unlock_irq(&atchan->lock); if (txd->flags & DMA_PREP_INTERRUPT) @@ -1713,6 +1700,8 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) dma_run_dependencies(txd); spin_lock_irq(&atchan->lock); + /* Move the xfer descriptors into the free descriptors list. */ + list_splice_init(&desc->descs_list, &atchan->free_descs_list); at_xdmac_advance_work(atchan); spin_unlock_irq(&atchan->lock); } @@ -1859,8 +1848,10 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan) cpu_relax(); /* Cancel all pending transfers. */ - list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) - at_xdmac_remove_xfer(atchan, desc); + list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) { + list_del(&desc->xfer_node); + list_splice_init(&desc->descs_list, &atchan->free_descs_list); + } clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status); From 801db90bf294f647b967e8d99b9ae121bea63d0d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:09 +0200 Subject: [PATCH 224/493] dmaengine: at_xdmac: Move the free desc to the tail of the desc list Move the free desc to the tail of the list, so that the sequence of descriptors is more track-able in case of debug. One would know which descriptor should come next and could easier catch concurrency over descriptors for example. virt-dma uses list_splice_tail_init() as well, follow the core driver. Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-7-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index d5b37459f906..b6547f1b5645 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -729,7 +729,8 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (!desc) { dev_err(chan2dev(chan), "can't get descriptor\n"); if (first) - list_splice_init(&first->descs_list, &atchan->free_descs_list); + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); goto spin_unlock; } @@ -817,7 +818,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, if (!desc) { dev_err(chan2dev(chan), "can't get descriptor\n"); if (first) - list_splice_init(&first->descs_list, &atchan->free_descs_list); + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); spin_unlock_irqrestore(&atchan->lock, irqflags); return NULL; } @@ -1051,8 +1053,8 @@ at_xdmac_prep_interleaved(struct dma_chan *chan, src_addr, dst_addr, xt, chunk); if (!desc) { - list_splice_init(&first->descs_list, - &atchan->free_descs_list); + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); return NULL; } @@ -1132,7 +1134,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (!desc) { dev_err(chan2dev(chan), "can't get descriptor\n"); if (first) - list_splice_init(&first->descs_list, &atchan->free_descs_list); + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); return NULL; } @@ -1308,8 +1311,8 @@ at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl, sg_dma_len(sg), value); if (!desc && first) - list_splice_init(&first->descs_list, - &atchan->free_descs_list); + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); if (!first) first = desc; @@ -1701,7 +1704,8 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) spin_lock_irq(&atchan->lock); /* Move the xfer descriptors into the free descriptors list. */ - list_splice_init(&desc->descs_list, &atchan->free_descs_list); + list_splice_tail_init(&desc->descs_list, + &atchan->free_descs_list); at_xdmac_advance_work(atchan); spin_unlock_irq(&atchan->lock); } @@ -1850,7 +1854,8 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan) /* Cancel all pending transfers. */ list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) { list_del(&desc->xfer_node); - list_splice_init(&desc->descs_list, &atchan->free_descs_list); + list_splice_tail_init(&desc->descs_list, + &atchan->free_descs_list); } clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); From 18deddea9184b62941395889ff7659529c877326 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:10 +0200 Subject: [PATCH 225/493] dmaengine: at_xdmac: Fix concurrency over xfers_list Since tx_submit can be called from a hard IRQ, xfers_list must be protected with a lock to avoid concurency on the list's elements. Since at_xdmac_handle_cyclic() is called from a tasklet, spin_lock_irq is enough to protect from a hard IRQ. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-8-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index b6547f1b5645..eeb03065d484 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1608,14 +1608,17 @@ static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan) struct at_xdmac_desc *desc; struct dma_async_tx_descriptor *txd; - if (!list_empty(&atchan->xfers_list)) { - desc = list_first_entry(&atchan->xfers_list, - struct at_xdmac_desc, xfer_node); - txd = &desc->tx_dma_desc; - - if (txd->flags & DMA_PREP_INTERRUPT) - dmaengine_desc_get_callback_invoke(txd, NULL); + spin_lock_irq(&atchan->lock); + if (list_empty(&atchan->xfers_list)) { + spin_unlock_irq(&atchan->lock); + return; } + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, + xfer_node); + spin_unlock_irq(&atchan->lock); + txd = &desc->tx_dma_desc; + if (txd->flags & DMA_PREP_INTERRUPT) + dmaengine_desc_get_callback_invoke(txd, NULL); } static void at_xdmac_handle_error(struct at_xdmac_chan *atchan) From 42468aa8b1aa4d7d4d9b56b1d2959d34b7e27575 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:11 +0200 Subject: [PATCH 226/493] dmaengine: at_xdmac: Remove a level of indentation in at_xdmac_advance_work() It's easier to read code with fewer levels of indentation, remove a level of indentation in at_xdmac_advance_work() if (!foo() & !bar()) { } was replaced by: if (foo() || bar()) return; Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-9-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index eeb03065d484..0b09ec752db4 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1593,14 +1593,14 @@ static void at_xdmac_advance_work(struct at_xdmac_chan *atchan) * If channel is enabled, do nothing, advance_work will be triggered * after the interruption. */ - if (!at_xdmac_chan_is_enabled(atchan) && !list_empty(&atchan->xfers_list)) { - desc = list_first_entry(&atchan->xfers_list, - struct at_xdmac_desc, - xfer_node); - dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); - if (!desc->active_xfer) - at_xdmac_start_xfer(atchan, desc); - } + if (at_xdmac_chan_is_enabled(atchan) || list_empty(&atchan->xfers_list)) + return; + + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, + xfer_node); + dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); + if (!desc->active_xfer) + at_xdmac_start_xfer(atchan, desc); } static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan) From 1385eb4d14d447cc5d744bc2ac34f43be66c9963 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:12 +0200 Subject: [PATCH 227/493] dmaengine: at_xdmac: Fix lld view setting AT_XDMAC_CNDC_NDVIEW_NDV3 was set even for AT_XDMAC_MBR_UBC_NDV2, because of the wrong bit handling. Fix it. Fixes: ee0fe35c8dcd ("dmaengine: xdmac: Handle descriptor's view 3 registers") Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-10-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 0b09ec752db4..6e5bfc9b3825 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -99,6 +99,7 @@ #define AT_XDMAC_CNDC_NDE (0x1 << 0) /* Channel x Next Descriptor Enable */ #define AT_XDMAC_CNDC_NDSUP (0x1 << 1) /* Channel x Next Descriptor Source Update */ #define AT_XDMAC_CNDC_NDDUP (0x1 << 2) /* Channel x Next Descriptor Destination Update */ +#define AT_XDMAC_CNDC_NDVIEW_MASK GENMASK(28, 27) #define AT_XDMAC_CNDC_NDVIEW_NDV0 (0x0 << 3) /* Channel x Next Descriptor View 0 */ #define AT_XDMAC_CNDC_NDVIEW_NDV1 (0x1 << 3) /* Channel x Next Descriptor View 1 */ #define AT_XDMAC_CNDC_NDVIEW_NDV2 (0x2 << 3) /* Channel x Next Descriptor View 2 */ @@ -402,7 +403,8 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, */ if (at_xdmac_chan_is_cyclic(atchan)) reg = AT_XDMAC_CNDC_NDVIEW_NDV1; - else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) + else if ((first->lld.mbr_ubc & + AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3) reg = AT_XDMAC_CNDC_NDVIEW_NDV3; else reg = AT_XDMAC_CNDC_NDVIEW_NDV2; From 912f7c6f7fac273f40e621447cf17d14b50d6e5b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:13 +0200 Subject: [PATCH 228/493] dmaengine: at_xdmac: Fix at_xdmac_lld struct definition The hardware channel next descriptor view structure contains just fields of 32 bits, while dma_addr_t can be of type u64 or u32 depending on CONFIG_ARCH_DMA_ADDR_T_64BIT. Force u32 to comply with what the hardware expects. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-11-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 6e5bfc9b3825..abe8c4615e65 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -253,15 +253,15 @@ struct at_xdmac { /* Linked List Descriptor */ struct at_xdmac_lld { - dma_addr_t mbr_nda; /* Next Descriptor Member */ - u32 mbr_ubc; /* Microblock Control Member */ - dma_addr_t mbr_sa; /* Source Address Member */ - dma_addr_t mbr_da; /* Destination Address Member */ - u32 mbr_cfg; /* Configuration Register */ - u32 mbr_bc; /* Block Control Register */ - u32 mbr_ds; /* Data Stride Register */ - u32 mbr_sus; /* Source Microblock Stride Register */ - u32 mbr_dus; /* Destination Microblock Stride Register */ + u32 mbr_nda; /* Next Descriptor Member */ + u32 mbr_ubc; /* Microblock Control Member */ + u32 mbr_sa; /* Source Address Member */ + u32 mbr_da; /* Destination Address Member */ + u32 mbr_cfg; /* Configuration Register */ + u32 mbr_bc; /* Block Control Register */ + u32 mbr_ds; /* Data Stride Register */ + u32 mbr_sus; /* Source Microblock Stride Register */ + u32 mbr_dus; /* Destination Microblock Stride Register */ }; /* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */ From a61210cae80cac0701d5aca9551466a389717fd2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:14 +0200 Subject: [PATCH 229/493] dmaengine: at_xdmac: Remove a level of indentation in at_xdmac_tasklet() Apart of making the code easier to read, this patch is a prerequisite for a functional change: tasklets run with interrupts enabled, so we need to protect atchan->irq_status with spin_lock_irq() otherwise the tasklet can be interrupted by the IRQ that modifies irq_status. atchan->irq_status will be protected in a further patch. Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-12-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 70 ++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index abe8c4615e65..ba727751a9f6 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1667,53 +1667,51 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) { struct at_xdmac_chan *atchan = from_tasklet(atchan, t, tasklet); struct at_xdmac_desc *desc; + struct dma_async_tx_descriptor *txd; u32 error_mask; dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", __func__, atchan->irq_status); - error_mask = AT_XDMAC_CIS_RBEIS - | AT_XDMAC_CIS_WBEIS - | AT_XDMAC_CIS_ROIS; + if (at_xdmac_chan_is_cyclic(atchan)) + return at_xdmac_handle_cyclic(atchan); - if (at_xdmac_chan_is_cyclic(atchan)) { - at_xdmac_handle_cyclic(atchan); - } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) - || (atchan->irq_status & error_mask)) { - struct dma_async_tx_descriptor *txd; + error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS | + AT_XDMAC_CIS_ROIS; - if (atchan->irq_status & error_mask) - at_xdmac_handle_error(atchan); + if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) && + !(atchan->irq_status & error_mask)) + return; - spin_lock_irq(&atchan->lock); - desc = list_first_entry(&atchan->xfers_list, - struct at_xdmac_desc, - xfer_node); - dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); - if (!desc->active_xfer) { - dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting"); - spin_unlock_irq(&atchan->lock); - return; - } + if (atchan->irq_status & error_mask) + at_xdmac_handle_error(atchan); - txd = &desc->tx_dma_desc; - dma_cookie_complete(txd); - /* Remove the transfer from the transfer list. */ - list_del(&desc->xfer_node); - spin_unlock_irq(&atchan->lock); - - if (txd->flags & DMA_PREP_INTERRUPT) - dmaengine_desc_get_callback_invoke(txd, NULL); - - dma_run_dependencies(txd); - - spin_lock_irq(&atchan->lock); - /* Move the xfer descriptors into the free descriptors list. */ - list_splice_tail_init(&desc->descs_list, - &atchan->free_descs_list); - at_xdmac_advance_work(atchan); + spin_lock_irq(&atchan->lock); + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, + xfer_node); + dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); + if (!desc->active_xfer) { + dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting"); spin_unlock_irq(&atchan->lock); + return; } + + txd = &desc->tx_dma_desc; + dma_cookie_complete(txd); + /* Remove the transfer from the transfer list. */ + list_del(&desc->xfer_node); + spin_unlock_irq(&atchan->lock); + + if (txd->flags & DMA_PREP_INTERRUPT) + dmaengine_desc_get_callback_invoke(txd, NULL); + + dma_run_dependencies(txd); + + spin_lock_irq(&atchan->lock); + /* Move the xfer descriptors into the free descriptors list. */ + list_splice_tail_init(&desc->descs_list, &atchan->free_descs_list); + at_xdmac_advance_work(atchan); + spin_unlock_irq(&atchan->lock); } static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) From e77e561925df2faf77a41896df24a59141a445c9 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Dec 2021 13:01:15 +0200 Subject: [PATCH 230/493] dmaengine: at_xdmac: Fix race over irq_status Tasklets run with interrupts enabled, so we need to protect atchan->irq_status with spin_lock_irq() otherwise the tasklet can be interrupted by the IRQ that modifies irq_status. Move the dev_dbg that prints the irq_status in at_xdmac_handle_cyclic() and lower in at_xdmac_tasklet() where the IRQ is disabled. Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20211215110115.191749-13-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index ba727751a9f6..a1da2b4b6d73 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1611,6 +1611,8 @@ static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan) struct dma_async_tx_descriptor *txd; spin_lock_irq(&atchan->lock); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); if (list_empty(&atchan->xfers_list)) { spin_unlock_irq(&atchan->lock); return; @@ -1623,6 +1625,7 @@ static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan) dmaengine_desc_get_callback_invoke(txd, NULL); } +/* Called with atchan->lock held. */ static void at_xdmac_handle_error(struct at_xdmac_chan *atchan) { struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); @@ -1641,8 +1644,6 @@ static void at_xdmac_handle_error(struct at_xdmac_chan *atchan) if (atchan->irq_status & AT_XDMAC_CIS_ROIS) dev_err(chan2dev(&atchan->chan), "request overflow error!!!"); - spin_lock_irq(&atchan->lock); - /* Channel must be disabled first as it's not done automatically */ at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask) @@ -1652,8 +1653,6 @@ static void at_xdmac_handle_error(struct at_xdmac_chan *atchan) struct at_xdmac_desc, xfer_node); - spin_unlock_irq(&atchan->lock); - /* Print bad descriptor's details if needed */ dev_dbg(chan2dev(&atchan->chan), "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", @@ -1670,15 +1669,17 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) struct dma_async_tx_descriptor *txd; u32 error_mask; - dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", - __func__, atchan->irq_status); - if (at_xdmac_chan_is_cyclic(atchan)) return at_xdmac_handle_cyclic(atchan); error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS | AT_XDMAC_CIS_ROIS; + spin_lock_irq(&atchan->lock); + + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); + if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) && !(atchan->irq_status & error_mask)) return; @@ -1686,7 +1687,6 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) if (atchan->irq_status & error_mask) at_xdmac_handle_error(atchan); - spin_lock_irq(&atchan->lock); desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node); dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); From c206a389c97c9533971cd05eed69b49f535cc193 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:31 +0100 Subject: [PATCH 231/493] ata: libata: tracepoints for bus-master DMA Add tracepoints for bus-master DMA and taskfile related functions. That allows us to drop the relevant DPRINTK() calls. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/ata_piix.c | 8 +- drivers/ata/libata-core.c | 6 ++ drivers/ata/libata-sff.c | 44 ++++++----- drivers/ata/libata-trace.c | 18 +++++ drivers/ata/pata_arasan_cf.c | 3 + drivers/ata/pata_octeon_cf.c | 18 ++--- drivers/ata/pata_pdc202xx_old.c | 2 - drivers/ata/pata_sil680.c | 1 - drivers/ata/sata_dwc_460ex.c | 59 +++------------ drivers/ata/sata_nv.c | 12 +-- drivers/ata/sata_rcar.c | 2 - include/trace/events/libata.h | 125 ++++++++++++++++++++++++++++++++ 12 files changed, 201 insertions(+), 97 deletions(-) diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index eb6bf30bd2e3..27b0d903f91f 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -77,6 +77,7 @@ #include #include #include +#include #define DRV_NAME "ata_piix" #define DRV_VERSION "2.13" @@ -816,10 +817,15 @@ static int piix_sidpr_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, static bool piix_irq_check(struct ata_port *ap) { + unsigned char host_stat; + if (unlikely(!ap->ioaddr.bmdma_addr)) return false; - return ap->ops->bmdma_status(ap) & ATA_DMA_INTR; + host_stat = ap->ops->bmdma_status(ap); + trace_ata_bmdma_status(ap, host_stat); + + return host_stat & ATA_DMA_INTR; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 846596fdd831..3db4fd2029ce 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6576,3 +6576,9 @@ void ata_print_version(const struct device *dev, const char *version) dev_printk(KERN_DEBUG, dev, "version %s\n", version); } EXPORT_SYMBOL(ata_print_version); + +EXPORT_TRACEPOINT_SYMBOL_GPL(ata_tf_load); +EXPORT_TRACEPOINT_SYMBOL_GPL(ata_exec_command); +EXPORT_TRACEPOINT_SYMBOL_GPL(ata_bmdma_setup); +EXPORT_TRACEPOINT_SYMBOL_GPL(ata_bmdma_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(ata_bmdma_status); diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 4cc7c0606e06..76bd87691978 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -18,7 +18,7 @@ #include #include #include - +#include #include "libata.h" static struct workqueue_struct *ata_sff_wq; @@ -409,12 +409,6 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) iowrite8(tf->hob_lbal, ioaddr->lbal_addr); iowrite8(tf->hob_lbam, ioaddr->lbam_addr); iowrite8(tf->hob_lbah, ioaddr->lbah_addr); - VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n", - tf->hob_feature, - tf->hob_nsect, - tf->hob_lbal, - tf->hob_lbam, - tf->hob_lbah); } if (is_addr) { @@ -423,18 +417,10 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) iowrite8(tf->lbal, ioaddr->lbal_addr); iowrite8(tf->lbam, ioaddr->lbam_addr); iowrite8(tf->lbah, ioaddr->lbah_addr); - VPRINTK("feat 0x%X nsect 0x%X lba 0x%X 0x%X 0x%X\n", - tf->feature, - tf->nsect, - tf->lbal, - tf->lbam, - tf->lbah); } - if (tf->flags & ATA_TFLAG_DEVICE) { + if (tf->flags & ATA_TFLAG_DEVICE) iowrite8(tf->device, ioaddr->device_addr); - VPRINTK("device 0x%X\n", tf->device); - } ata_wait_idle(ap); } @@ -494,8 +480,6 @@ EXPORT_SYMBOL_GPL(ata_sff_tf_read); */ void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf) { - DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command); - iowrite8(tf->command, ap->ioaddr.command_addr); ata_sff_pause(ap); } @@ -505,6 +489,7 @@ EXPORT_SYMBOL_GPL(ata_sff_exec_command); * ata_tf_to_host - issue ATA taskfile to host controller * @ap: port to which command is being issued * @tf: ATA taskfile register set + * @tag: tag of the associated command * * Issues ATA taskfile register set to ATA host controller, * with proper synchronization with interrupt handler and @@ -514,9 +499,12 @@ EXPORT_SYMBOL_GPL(ata_sff_exec_command); * spin_lock_irqsave(host lock) */ static inline void ata_tf_to_host(struct ata_port *ap, - const struct ata_taskfile *tf) + const struct ata_taskfile *tf, + unsigned int tag) { + trace_ata_tf_load(ap, tf); ap->ops->sff_tf_load(ap, tf); + trace_ata_exec_command(ap, tf, tag); ap->ops->sff_exec_command(ap, tf); } @@ -768,6 +756,7 @@ static void atapi_send_cdb(struct ata_port *ap, struct ata_queued_cmd *qc) case ATAPI_PROT_DMA: ap->hsm_task_state = HSM_ST_LAST; /* initiate bmdma */ + trace_ata_bmdma_start(ap, &qc->tf, qc->tag); ap->ops->bmdma_start(qc); break; #endif /* CONFIG_ATA_BMDMA */ @@ -1376,7 +1365,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) if (qc->tf.flags & ATA_TFLAG_POLLING) ata_qc_set_polling(qc); - ata_tf_to_host(ap, &qc->tf); + ata_tf_to_host(ap, &qc->tf, qc->tag); ap->hsm_task_state = HSM_ST_LAST; if (qc->tf.flags & ATA_TFLAG_POLLING) @@ -1388,7 +1377,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) if (qc->tf.flags & ATA_TFLAG_POLLING) ata_qc_set_polling(qc); - ata_tf_to_host(ap, &qc->tf); + ata_tf_to_host(ap, &qc->tf, qc->tag); if (qc->tf.flags & ATA_TFLAG_WRITE) { /* PIO data out protocol */ @@ -1418,7 +1407,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) if (qc->tf.flags & ATA_TFLAG_POLLING) ata_qc_set_polling(qc); - ata_tf_to_host(ap, &qc->tf); + ata_tf_to_host(ap, &qc->tf, qc->tag); ap->hsm_task_state = HSM_ST_FIRST; @@ -2745,8 +2734,11 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) case ATA_PROT_DMA: WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING); + trace_ata_tf_load(ap, &qc->tf); ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */ + trace_ata_bmdma_setup(ap, &qc->tf, qc->tag); ap->ops->bmdma_setup(qc); /* set up bmdma */ + trace_ata_bmdma_start(ap, &qc->tf, qc->tag); ap->ops->bmdma_start(qc); /* initiate bmdma */ ap->hsm_task_state = HSM_ST_LAST; break; @@ -2754,7 +2746,9 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) case ATAPI_PROT_DMA: WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING); + trace_ata_tf_load(ap, &qc->tf); ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */ + trace_ata_bmdma_setup(ap, &qc->tf, qc->tag); ap->ops->bmdma_setup(qc); /* set up bmdma */ ap->hsm_task_state = HSM_ST_FIRST; @@ -2795,13 +2789,14 @@ unsigned int ata_bmdma_port_intr(struct ata_port *ap, struct ata_queued_cmd *qc) if (ap->hsm_task_state == HSM_ST_LAST && ata_is_dma(qc->tf.protocol)) { /* check status of DMA engine */ host_stat = ap->ops->bmdma_status(ap); - VPRINTK("ata%u: host_stat 0x%X\n", ap->print_id, host_stat); + trace_ata_bmdma_status(ap, host_stat); /* if it's not our irq... */ if (!(host_stat & ATA_DMA_INTR)) return ata_sff_idle_irq(ap); /* before we do anything else, clear DMA-Start bit */ + trace_ata_bmdma_stop(ap, &qc->tf, qc->tag); ap->ops->bmdma_stop(qc); bmdma_stopped = true; @@ -2870,6 +2865,7 @@ void ata_bmdma_error_handler(struct ata_port *ap) u8 host_stat; host_stat = ap->ops->bmdma_status(ap); + trace_ata_bmdma_status(ap, host_stat); /* BMDMA controllers indicate host bus error by * setting DMA_ERR bit and timing out. As it wasn't @@ -2881,6 +2877,7 @@ void ata_bmdma_error_handler(struct ata_port *ap) thaw = true; } + trace_ata_bmdma_stop(ap, &qc->tf, qc->tag); ap->ops->bmdma_stop(qc); /* if we're gonna thaw, make sure IRQ is clear */ @@ -2914,6 +2911,7 @@ void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc) if (ata_is_dma(qc->tf.protocol)) { spin_lock_irqsave(ap->lock, flags); + trace_ata_bmdma_stop(ap, &qc->tf, qc->tag); ap->ops->bmdma_stop(qc); spin_unlock_irqrestore(ap->lock, flags); } diff --git a/drivers/ata/libata-trace.c b/drivers/ata/libata-trace.c index 08e001303a82..8a929e4414dc 100644 --- a/drivers/ata/libata-trace.c +++ b/drivers/ata/libata-trace.c @@ -38,6 +38,24 @@ libata_trace_parse_status(struct trace_seq *p, unsigned char status) return ret; } +const char * +libata_trace_parse_host_stat(struct trace_seq *p, unsigned char host_stat) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "{ "); + if (host_stat & ATA_DMA_INTR) + trace_seq_printf(p, "INTR "); + if (host_stat & ATA_DMA_ERR) + trace_seq_printf(p, "ERR "); + if (host_stat & ATA_DMA_ACTIVE) + trace_seq_printf(p, "ACTIVE "); + trace_seq_putc(p, '}'); + trace_seq_putc(p, 0); + + return ret; +} + const char * libata_trace_parse_eh_action(struct trace_seq *p, unsigned int eh_action) { diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c index 63f39440a9b4..24c3d5e1fca3 100644 --- a/drivers/ata/pata_arasan_cf.c +++ b/drivers/ata/pata_arasan_cf.c @@ -39,6 +39,7 @@ #include #include #include +#include #define DRIVER_NAME "arasan_cf" #define TIMEOUT msecs_to_jiffies(3000) @@ -703,9 +704,11 @@ static unsigned int arasan_cf_qc_issue(struct ata_queued_cmd *qc) case ATA_PROT_DMA: WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING); + trace_ata_tf_load(ap, &qc->tf); ap->ops->sff_tf_load(ap, &qc->tf); acdev->dma_status = 0; acdev->qc = qc; + trace_ata_bmdma_start(ap, &qc->tf, qc->tag); arasan_cf_dma_start(acdev); ap->hsm_task_state = HSM_ST_LAST; break; diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index cdc95eb2b2cb..1fe756af3268 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -19,7 +19,7 @@ #include #include #include - +#include #include #include @@ -514,20 +514,14 @@ static void octeon_cf_exec_command16(struct ata_port *ap, { /* The base of the registers is at ioaddr.data_addr. */ void __iomem *base = ap->ioaddr.data_addr; - u16 blob; + u16 blob = 0; - if (tf->flags & ATA_TFLAG_DEVICE) { - VPRINTK("device 0x%X\n", tf->device); + if (tf->flags & ATA_TFLAG_DEVICE) blob = tf->device; - } else { - blob = 0; - } - DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command); blob |= (tf->command << 8); __raw_writew(blob, base + 6); - ata_wait_idle(ap); } @@ -541,12 +535,10 @@ static void octeon_cf_dma_setup(struct ata_queued_cmd *qc) struct octeon_cf_port *cf_port; cf_port = ap->private_data; - DPRINTK("ENTER\n"); /* issue r/w command */ qc->cursg = qc->sg; cf_port->dma_finished = 0; ap->ops->sff_exec_command(ap, &qc->tf); - DPRINTK("EXIT\n"); } /** @@ -699,6 +691,7 @@ static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance) if (!sg_is_last(qc->cursg)) { qc->cursg = sg_next(qc->cursg); handled = 1; + trace_ata_bmdma_start(ap, &qc->tf, qc->tag); octeon_cf_dma_start(qc); continue; } else { @@ -798,8 +791,11 @@ static unsigned int octeon_cf_qc_issue(struct ata_queued_cmd *qc) case ATA_PROT_DMA: WARN_ON(qc->tf.flags & ATA_TFLAG_POLLING); + trace_ata_tf_load(ap, &qc->tf); ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */ + trace_ata_bmdma_setup(ap, &qc->tf, qc->tag); octeon_cf_dma_setup(qc); /* set up dma */ + trace_ata_bmdma_start(ap, &qc->tf, qc->tag); octeon_cf_dma_start(qc); /* initiate dma */ ap->hsm_task_state = HSM_ST_LAST; break; diff --git a/drivers/ata/pata_pdc202xx_old.c b/drivers/ata/pata_pdc202xx_old.c index 0c5cbcd28d0d..b99849095853 100644 --- a/drivers/ata/pata_pdc202xx_old.c +++ b/drivers/ata/pata_pdc202xx_old.c @@ -38,8 +38,6 @@ static int pdc2026x_cable_detect(struct ata_port *ap) static void pdc202xx_exec_command(struct ata_port *ap, const struct ata_taskfile *tf) { - DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command); - iowrite8(tf->command, ap->ioaddr.command_addr); ndelay(400); } diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c index 43215a664b96..81238e097fe2 100644 --- a/drivers/ata/pata_sil680.c +++ b/drivers/ata/pata_sil680.c @@ -212,7 +212,6 @@ static void sil680_set_dmamode(struct ata_port *ap, struct ata_device *adev) static void sil680_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf) { - DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command); iowrite8(tf->command, ap->ioaddr.command_addr); ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD); } diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index bd4859563796..c33dc98e0d9d 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "libata.h" @@ -295,6 +296,7 @@ static const char *get_prot_descript(u8 protocol) } } +#ifdef DEBUG_NCQ static const char *get_dma_dir_descript(int dma_dir) { switch ((enum dma_data_direction)dma_dir) { @@ -308,21 +310,7 @@ static const char *get_dma_dir_descript(int dma_dir) return "none"; } } - -static void sata_dwc_tf_dump(struct ata_port *ap, struct ata_taskfile *tf) -{ - dev_vdbg(ap->dev, - "taskfile cmd: 0x%02x protocol: %s flags: 0x%lx device: %x\n", - tf->command, get_prot_descript(tf->protocol), tf->flags, - tf->device); - dev_vdbg(ap->dev, - "feature: 0x%02x nsect: 0x%x lbal: 0x%x lbam: 0x%x lbah: 0x%x\n", - tf->feature, tf->nsect, tf->lbal, tf->lbam, tf->lbah); - dev_vdbg(ap->dev, - "hob_feature: 0x%02x hob_nsect: 0x%x hob_lbal: 0x%x hob_lbam: 0x%x hob_lbah: 0x%x\n", - tf->hob_feature, tf->hob_nsect, tf->hob_lbal, tf->hob_lbam, - tf->hob_lbah); -} +#endif static void dma_dwc_xfer_done(void *hsdev_instance) { @@ -551,6 +539,7 @@ static irqreturn_t sata_dwc_isr(int irq, void *dev_instance) * active tag. It is the tag that matches the command about to * be completed. */ + trace_ata_bmdma_start(ap, &qc->tf, tag); qc->ap->link.active_tag = tag; sata_dwc_bmdma_start_by_tag(qc, tag); @@ -978,9 +967,6 @@ static void sata_dwc_exec_command_by_tag(struct ata_port *ap, { struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap); - dev_dbg(ap->dev, "%s cmd(0x%02x): %s tag=%d\n", __func__, tf->command, - ata_get_cmd_descript(tf->command), tag); - hsdevp->cmd_issued[tag] = cmd_issued; /* @@ -1003,12 +989,9 @@ static void sata_dwc_bmdma_setup(struct ata_queued_cmd *qc) { u8 tag = qc->hw_tag; - if (ata_is_ncq(qc->tf.protocol)) { - dev_dbg(qc->ap->dev, "%s: ap->link.sactive=0x%08x tag=%d\n", - __func__, qc->ap->link.sactive, tag); - } else { + if (!ata_is_ncq(qc->tf.protocol)) tag = 0; - } + sata_dwc_bmdma_setup_by_tag(qc, tag); } @@ -1035,12 +1018,6 @@ static void sata_dwc_bmdma_start_by_tag(struct ata_queued_cmd *qc, u8 tag) start_dma = 0; } - dev_dbg(ap->dev, - "%s qc=%p tag: %x cmd: 0x%02x dma_dir: %s start_dma? %x\n", - __func__, qc, tag, qc->tf.command, - get_dma_dir_descript(qc->dma_dir), start_dma); - sata_dwc_tf_dump(ap, &qc->tf); - if (start_dma) { sata_dwc_scr_read(&ap->link, SCR_ERROR, ®); if (reg & SATA_DWC_SERROR_ERR_BITS) { @@ -1065,13 +1042,9 @@ static void sata_dwc_bmdma_start(struct ata_queued_cmd *qc) { u8 tag = qc->hw_tag; - if (ata_is_ncq(qc->tf.protocol)) { - dev_dbg(qc->ap->dev, "%s: ap->link.sactive=0x%08x tag=%d\n", - __func__, qc->ap->link.sactive, tag); - } else { + if (!ata_is_ncq(qc->tf.protocol)) tag = 0; - } - dev_dbg(qc->ap->dev, "%s\n", __func__); + sata_dwc_bmdma_start_by_tag(qc, tag); } @@ -1082,16 +1055,6 @@ static unsigned int sata_dwc_qc_issue(struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap); -#ifdef DEBUG_NCQ - if (qc->hw_tag > 0 || ap->link.sactive > 1) - dev_info(ap->dev, - "%s ap id=%d cmd(0x%02x)=%s qc tag=%d prot=%s ap active_tag=0x%08x ap sactive=0x%08x\n", - __func__, ap->print_id, qc->tf.command, - ata_get_cmd_descript(qc->tf.command), - qc->hw_tag, get_prot_descript(qc->tf.protocol), - ap->link.active_tag, ap->link.sactive); -#endif - if (!ata_is_ncq(qc->tf.protocol)) tag = 0; @@ -1108,11 +1071,9 @@ static unsigned int sata_dwc_qc_issue(struct ata_queued_cmd *qc) sactive |= (0x00000001 << tag); sata_dwc_scr_write(&ap->link, SCR_ACTIVE, sactive); - dev_dbg(qc->ap->dev, - "%s: tag=%d ap->link.sactive = 0x%08x sactive=0x%08x\n", - __func__, tag, qc->ap->link.sactive, sactive); - + trace_ata_tf_load(ap, &qc->tf); ap->ops->sff_tf_load(ap, &qc->tf); + trace_ata_exec_command(ap, &qc->tf, tag); sata_dwc_exec_command_by_tag(ap, &qc->tf, tag, SATA_DWC_CMD_ISSUED_PEND); } else { diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 3c70405a0b80..06d381b9764e 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -31,6 +31,7 @@ #include #include #include +#include #define DRV_NAME "sata_nv" #define DRV_VERSION "3.5" @@ -1434,8 +1435,6 @@ static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc) writew(qc->hw_tag, mmio + NV_ADMA_APPEND); - DPRINTK("Issued tag %u\n", qc->hw_tag); - return 0; } @@ -2013,19 +2012,17 @@ static unsigned int nv_swncq_issue_atacmd(struct ata_port *ap, if (qc == NULL) return 0; - DPRINTK("Enter\n"); - writel((1 << qc->hw_tag), pp->sactive_block); pp->last_issue_tag = qc->hw_tag; pp->dhfis_bits &= ~(1 << qc->hw_tag); pp->dmafis_bits &= ~(1 << qc->hw_tag); pp->qc_active |= (0x1 << qc->hw_tag); + trace_ata_tf_load(ap, &qc->tf); ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */ + trace_ata_exec_command(ap, &qc->tf, qc->hw_tag); ap->ops->sff_exec_command(ap, &qc->tf); - DPRINTK("Issued tag %u\n", qc->hw_tag); - return 0; } @@ -2037,8 +2034,6 @@ static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc) if (qc->tf.protocol != ATA_PROT_NCQ) return ata_bmdma_qc_issue(qc); - DPRINTK("Enter\n"); - if (!pp->qc_active) nv_swncq_issue_atacmd(ap, qc); else @@ -2083,6 +2078,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap) u8 lack_dhfis = 0; host_stat = ap->ops->bmdma_status(ap); + trace_ata_bmdma_status(ap, host_stat); if (unlikely(host_stat & ATA_DMA_ERR)) { /* error when transferring data to/from memory */ ata_ehi_clear_desc(ehi); diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 9005833ab02f..b4994d182eda 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -436,8 +436,6 @@ static void sata_rcar_tf_read(struct ata_port *ap, struct ata_taskfile *tf) static void sata_rcar_exec_command(struct ata_port *ap, const struct ata_taskfile *tf) { - DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command); - iowrite32(tf->command, ap->ioaddr.command_addr); ata_sff_pause(ap); } diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index 2394fc2b2831..acb9a4fc18ed 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -151,6 +151,9 @@ const char *libata_trace_parse_status(struct trace_seq*, unsigned char); #define __parse_status(s) libata_trace_parse_status(p, s) +const char *libata_trace_parse_host_stat(struct trace_seq *, unsigned char); +#define __parse_host_stat(s) libata_trace_parse_host_stat(p, s) + const char *libata_trace_parse_eh_action(struct trace_seq *, unsigned int); #define __parse_eh_action(a) libata_trace_parse_eh_action(p, a) @@ -299,6 +302,128 @@ DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_done, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc)); +TRACE_EVENT(ata_tf_load, + + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf), + + TP_ARGS(ap, tf), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned char, cmd ) + __field( unsigned char, dev ) + __field( unsigned char, lbal ) + __field( unsigned char, lbam ) + __field( unsigned char, lbah ) + __field( unsigned char, nsect ) + __field( unsigned char, feature ) + __field( unsigned char, hob_lbal ) + __field( unsigned char, hob_lbam ) + __field( unsigned char, hob_lbah ) + __field( unsigned char, hob_nsect ) + __field( unsigned char, hob_feature ) + __field( unsigned char, proto ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + __entry->proto = tf->protocol; + __entry->cmd = tf->command; + __entry->dev = tf->device; + __entry->lbal = tf->lbal; + __entry->lbam = tf->lbam; + __entry->lbah = tf->lbah; + __entry->hob_lbal = tf->hob_lbal; + __entry->hob_lbam = tf->hob_lbam; + __entry->hob_lbah = tf->hob_lbah; + __entry->feature = tf->feature; + __entry->hob_feature = tf->hob_feature; + __entry->nsect = tf->nsect; + __entry->hob_nsect = tf->hob_nsect; + ), + + TP_printk("ata_port=%u proto=%s cmd=%s%s " \ + " tf=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)", + __entry->ata_port, + show_protocol_name(__entry->proto), + show_opcode_name(__entry->cmd), + __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect), + __entry->cmd, __entry->feature, __entry->nsect, + __entry->lbal, __entry->lbam, __entry->lbah, + __entry->hob_feature, __entry->hob_nsect, + __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah, + __entry->dev) +); + +DECLARE_EVENT_CLASS(ata_exec_command_template, + + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + + TP_ARGS(ap, tf, tag), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, tag ) + __field( unsigned char, cmd ) + __field( unsigned char, feature ) + __field( unsigned char, hob_nsect ) + __field( unsigned char, proto ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + __entry->tag = tag; + __entry->proto = tf->protocol; + __entry->cmd = tf->command; + __entry->feature = tf->feature; + __entry->hob_nsect = tf->hob_nsect; + ), + + TP_printk("ata_port=%u tag=%d proto=%s cmd=%s%s", + __entry->ata_port, __entry->tag, + show_protocol_name(__entry->proto), + show_opcode_name(__entry->cmd), + __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect)) +); + +DEFINE_EVENT(ata_exec_command_template, ata_exec_command, + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + TP_ARGS(ap, tf, tag)); + +DEFINE_EVENT(ata_exec_command_template, ata_bmdma_setup, + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + TP_ARGS(ap, tf, tag)); + +DEFINE_EVENT(ata_exec_command_template, ata_bmdma_start, + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + TP_ARGS(ap, tf, tag)); + +DEFINE_EVENT(ata_exec_command_template, ata_bmdma_stop, + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + TP_ARGS(ap, tf, tag)); + +TRACE_EVENT(ata_bmdma_status, + + TP_PROTO(struct ata_port *ap, unsigned int host_stat), + + TP_ARGS(ap, host_stat), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, tag ) + __field( unsigned char, host_stat ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + __entry->host_stat = host_stat; + ), + + TP_printk("ata_port=%u host_stat=%s", + __entry->ata_port, + __parse_host_stat(__entry->host_stat)) +); + TRACE_EVENT(ata_eh_link_autopsy, TP_PROTO(struct ata_device *dev, unsigned int eh_action, unsigned int eh_err_mask), From 7fad6ad6a357c73f0bdf55476238ae2884de78a3 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:32 +0100 Subject: [PATCH 232/493] ata: libata-sff: tracepoints for HSM state machine Add tracepoints for the HSM state machine and drop DPRINTK calls Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-sff.c | 20 ++---- drivers/ata/libata-trace.c | 29 ++++++++ include/trace/events/libata.h | 125 ++++++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 13 deletions(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 76bd87691978..b544bdc6d0a3 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -668,7 +668,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) page = nth_page(page, (offset >> PAGE_SHIFT)); offset %= PAGE_SIZE; - DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); + trace_ata_sff_pio_transfer_data(qc, offset, qc->sect_size); /* * Split the transfer when it splits a page boundary. Note that the @@ -738,7 +738,7 @@ static void ata_pio_sectors(struct ata_queued_cmd *qc) static void atapi_send_cdb(struct ata_port *ap, struct ata_queued_cmd *qc) { /* send SCSI cdb */ - DPRINTK("send cdb\n"); + trace_atapi_send_cdb(qc, 0, qc->dev->cdb_len); WARN_ON_ONCE(qc->dev->cdb_len < 12); ap->ops->sff_data_xfer(qc, qc->cdb, qc->dev->cdb_len, 1); @@ -809,7 +809,7 @@ next_sg: /* don't cross page boundaries */ count = min(count, (unsigned int)PAGE_SIZE - offset); - DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); + trace_atapi_pio_transfer_data(qc, offset, count); /* do the actual data transfer */ buf = kmap_atomic(page); @@ -991,8 +991,7 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, WARN_ON_ONCE(in_wq != ata_hsm_ok_in_wq(ap, qc)); fsm_start: - DPRINTK("ata%u: protocol %d task_state %d (dev_stat 0x%X)\n", - ap->print_id, qc->tf.protocol, ap->hsm_task_state, status); + trace_ata_sff_hsm_state(qc, status); switch (ap->hsm_task_state) { case HSM_ST_FIRST: @@ -1193,8 +1192,7 @@ fsm_start: } /* no more data to transfer */ - DPRINTK("ata%u: dev %u command complete, drv_stat 0x%x\n", - ap->print_id, qc->dev->devno, status); + trace_ata_sff_hsm_command_complete(qc, status); WARN_ON_ONCE(qc->err_mask & (AC_ERR_DEV | AC_ERR_HSM)); @@ -1251,7 +1249,7 @@ EXPORT_SYMBOL_GPL(ata_sff_queue_pio_task); void ata_sff_flush_pio_task(struct ata_port *ap) { - DPRINTK("ENTER\n"); + trace_ata_sff_flush_pio_task(ap); cancel_delayed_work_sync(&ap->sff_pio_task); @@ -1268,9 +1266,6 @@ void ata_sff_flush_pio_task(struct ata_port *ap) spin_unlock_irq(ap->lock); ap->sff_pio_task_link = NULL; - - if (ata_msg_ctl(ap)) - ata_port_dbg(ap, "%s: EXIT\n", __func__); } static void ata_sff_pio_task(struct work_struct *work) @@ -1467,8 +1462,7 @@ static unsigned int __ata_sff_port_intr(struct ata_port *ap, { u8 status; - VPRINTK("ata%u: protocol %d task_state %d\n", - ap->print_id, qc->tf.protocol, ap->hsm_task_state); + trace_ata_sff_port_intr(qc, hsmv_on_idle); /* Check whether we are expecting interrupt in this state */ switch (ap->hsm_task_state) { diff --git a/drivers/ata/libata-trace.c b/drivers/ata/libata-trace.c index 8a929e4414dc..e0e4d0d5a100 100644 --- a/drivers/ata/libata-trace.c +++ b/drivers/ata/libata-trace.c @@ -155,6 +155,35 @@ libata_trace_parse_qc_flags(struct trace_seq *p, unsigned int qc_flags) return ret; } +const char * +libata_trace_parse_tf_flags(struct trace_seq *p, unsigned int tf_flags) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "%x", tf_flags); + if (tf_flags) { + trace_seq_printf(p, "{ "); + if (tf_flags & ATA_TFLAG_LBA48) + trace_seq_printf(p, "LBA48 "); + if (tf_flags & ATA_TFLAG_ISADDR) + trace_seq_printf(p, "ISADDR "); + if (tf_flags & ATA_TFLAG_DEVICE) + trace_seq_printf(p, "DEV "); + if (tf_flags & ATA_TFLAG_WRITE) + trace_seq_printf(p, "WRITE "); + if (tf_flags & ATA_TFLAG_LBA) + trace_seq_printf(p, "LBA "); + if (tf_flags & ATA_TFLAG_FUA) + trace_seq_printf(p, "FUA "); + if (tf_flags & ATA_TFLAG_POLLING) + trace_seq_printf(p, "POLL "); + trace_seq_putc(p, '}'); + } + trace_seq_putc(p, 0); + + return ret; +} + const char * libata_trace_parse_subcmd(struct trace_seq *p, unsigned char cmd, unsigned char feature, unsigned char hob_nsect) diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index acb9a4fc18ed..fcb8fde39614 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -148,6 +148,15 @@ ata_class_name(ATA_DEV_ZAC_UNSUP), \ ata_class_name(ATA_DEV_NONE)) +#define ata_sff_hsm_state_name(state) { state, #state } +#define show_sff_hsm_state_name(val) \ + __print_symbolic(val, \ + ata_sff_hsm_state_name(HSM_ST_IDLE), \ + ata_sff_hsm_state_name(HSM_ST_FIRST), \ + ata_sff_hsm_state_name(HSM_ST), \ + ata_sff_hsm_state_name(HSM_ST_LAST), \ + ata_sff_hsm_state_name(HSM_ST_ERR)) + const char *libata_trace_parse_status(struct trace_seq*, unsigned char); #define __parse_status(s) libata_trace_parse_status(p, s) @@ -163,6 +172,9 @@ const char *libata_trace_parse_eh_err_mask(struct trace_seq *, unsigned int); const char *libata_trace_parse_qc_flags(struct trace_seq *, unsigned int); #define __parse_qc_flags(f) libata_trace_parse_qc_flags(p, f) +const char *libata_trace_parse_tf_flags(struct trace_seq *, unsigned int); +#define __parse_tf_flags(f) libata_trace_parse_tf_flags(p, f) + const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char, unsigned char, unsigned char); #define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h) @@ -558,6 +570,119 @@ DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset, TP_PROTO(struct ata_link *link, unsigned int *class, int rc), TP_ARGS(link, class, rc)); +DECLARE_EVENT_CLASS(ata_sff_hsm_template, + + TP_PROTO(struct ata_queued_cmd *qc, unsigned char status), + + TP_ARGS(qc, status), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, ata_dev ) + __field( unsigned int, tag ) + __field( unsigned int, qc_flags ) + __field( unsigned int, protocol ) + __field( unsigned int, hsm_state ) + __field( unsigned char, dev_state ) + ), + + TP_fast_assign( + __entry->ata_port = qc->ap->print_id; + __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; + __entry->tag = qc->tag; + __entry->qc_flags = qc->flags; + __entry->protocol = qc->tf.protocol; + __entry->hsm_state = qc->ap->hsm_task_state; + __entry->dev_state = status; + ), + + TP_printk("ata_port=%u ata_dev=%u tag=%d proto=%s flags=%s task_state=%s dev_stat=0x%X", + __entry->ata_port, __entry->ata_dev, __entry->tag, + show_protocol_name(__entry->protocol), + __parse_qc_flags(__entry->qc_flags), + show_sff_hsm_state_name(__entry->hsm_state), + __entry->dev_state) +); + +DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_state, + TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), + TP_ARGS(qc, state)); + +DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_command_complete, + TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), + TP_ARGS(qc, state)); + +DEFINE_EVENT(ata_sff_hsm_template, ata_sff_port_intr, + TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), + TP_ARGS(qc, state)); + +DECLARE_EVENT_CLASS(ata_transfer_data_template, + + TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), + + TP_ARGS(qc, offset, count), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, ata_dev ) + __field( unsigned int, tag ) + __field( unsigned int, flags ) + __field( unsigned int, offset ) + __field( unsigned int, bytes ) + ), + + TP_fast_assign( + __entry->ata_port = qc->ap->print_id; + __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; + __entry->tag = qc->tag; + __entry->flags = qc->tf.flags; + __entry->offset = offset; + __entry->bytes = count; + ), + + TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s offset=%u bytes=%u", + __entry->ata_port, __entry->ata_dev, __entry->tag, + __parse_tf_flags(__entry->flags), + __entry->offset, __entry->bytes) +); + +DEFINE_EVENT(ata_transfer_data_template, ata_sff_pio_transfer_data, + TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), + TP_ARGS(qc, offset, count)); + +DEFINE_EVENT(ata_transfer_data_template, atapi_pio_transfer_data, + TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), + TP_ARGS(qc, offset, count)); + +DEFINE_EVENT(ata_transfer_data_template, atapi_send_cdb, + TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), + TP_ARGS(qc, offset, count)); + +DECLARE_EVENT_CLASS(ata_sff_template, + + TP_PROTO(struct ata_port *ap), + + TP_ARGS(ap), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned char, hsm_state ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + __entry->hsm_state = ap->hsm_task_state; + ), + + TP_printk("ata_port=%u task_state=%s", + __entry->ata_port, + show_sff_hsm_state_name(__entry->hsm_state)) +); + +DEFINE_EVENT(ata_sff_template, ata_sff_flush_pio_task, + TP_PROTO(struct ata_port *ap), + TP_ARGS(ap)); + #endif /* _TRACE_LIBATA_H */ /* This part must be outside protection */ From 1fe9fb71b2ffcedd794daacf4db2056a6cb5199e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:33 +0100 Subject: [PATCH 233/493] ata: libata-scsi: drop DPRINTK calls for cdb translation Drop DPRINTK calls for cdb translation as they are already covered by other traces, and also drop the DPRINTK calls in ata_scsi_hotplug(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index b0b7ab46a03c..bfbb4cca4c17 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1469,9 +1469,6 @@ static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc) head = track % dev->heads; sect = (u32)block % dev->sectors + 1; - DPRINTK("block %u track %u cyl %u head %u sect %u\n", - (u32)block, track, cyl, head, sect); - /* Check whether the converted CHS can fit. Cylinder: 0-65535 Head: 0-15 @@ -1594,7 +1591,6 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) goto invalid_fld; break; default: - DPRINTK("no-byte command\n"); fp = 0; goto invalid_fld; } @@ -1748,7 +1744,6 @@ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd, early_finish: ata_qc_free(qc); scsi_done(cmd); - DPRINTK("EXIT - early finish (good or error)\n"); return 0; err_did: @@ -1756,12 +1751,10 @@ err_did: cmd->result = (DID_ERROR << 16); scsi_done(cmd); err_mem: - DPRINTK("EXIT - internal\n"); return 0; defer: ata_qc_free(qc); - DPRINTK("EXIT - defer\n"); if (rc == ATA_DEFER_LINK) return SCSI_MLQUEUE_DEVICE_BUSY; else @@ -2512,8 +2505,6 @@ static void atapi_request_sense(struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; struct scsi_cmnd *cmd = qc->scsicmd; - DPRINTK("ATAPI request sense\n"); - memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); #ifdef CONFIG_ATA_SFF @@ -2552,8 +2543,6 @@ static void atapi_request_sense(struct ata_queued_cmd *qc) qc->complete_fn = atapi_sense_complete; ata_qc_issue(qc); - - DPRINTK("EXIT\n"); } /* @@ -2663,7 +2652,6 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; if (scmd->sc_data_direction == DMA_TO_DEVICE) { qc->tf.flags |= ATA_TFLAG_WRITE; - DPRINTK("direction: write\n"); } qc->tf.command = ATA_CMD_PACKET; @@ -4053,8 +4041,6 @@ int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev) return rc; bad_cdb_len: - DPRINTK("bad CDB len=%u, scsi_op=0x%02x, max=%u\n", - scmd->cmd_len, scsi_op, dev->cdb_len); scmd->result = DID_ERROR << 16; scsi_done(scmd); return 0; @@ -4525,12 +4511,9 @@ void ata_scsi_hotplug(struct work_struct *work) container_of(work, struct ata_port, hotplug_task.work); int i; - if (ap->pflags & ATA_PFLAG_UNLOADING) { - DPRINTK("ENTER/EXIT - unloading\n"); + if (ap->pflags & ATA_PFLAG_UNLOADING) return; - } - DPRINTK("ENTER\n"); mutex_lock(&ap->scsi_scan_mutex); /* Unplug detached devices. We cannot use link iterator here @@ -4546,7 +4529,6 @@ void ata_scsi_hotplug(struct work_struct *work) ata_scsi_scan_host(ap, 0); mutex_unlock(&ap->scsi_scan_mutex); - DPRINTK("EXIT\n"); } /** From c318458c9359ce3d10943b0c15a9b9f43dda7b2e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:34 +0100 Subject: [PATCH 234/493] ata: libata: add tracepoints for ATA error handling Add tracepoints for ATA error handling. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-eh.c | 26 ++++----------- drivers/ata/libata-pmp.c | 8 ----- drivers/ata/libata-sata.c | 3 -- include/trace/events/libata.h | 60 +++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 30 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 043a1c846f2c..69f51616d8bd 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -533,8 +533,6 @@ void ata_scsi_error(struct Scsi_Host *host) unsigned long flags; LIST_HEAD(eh_work_q); - DPRINTK("ENTER\n"); - spin_lock_irqsave(host->host_lock, flags); list_splice_init(&host->eh_cmd_q, &eh_work_q); spin_unlock_irqrestore(host->host_lock, flags); @@ -548,7 +546,6 @@ void ata_scsi_error(struct Scsi_Host *host) /* finish or retry handled scmd's and clean up */ WARN_ON(!list_empty(&eh_work_q)); - DPRINTK("EXIT\n"); } /** @@ -940,7 +937,7 @@ void ata_std_sched_eh(struct ata_port *ap) ata_eh_set_pending(ap, 1); scsi_schedule_eh(ap->scsi_host); - DPRINTK("port EH scheduled\n"); + trace_ata_std_sched_eh(ap); } EXPORT_SYMBOL_GPL(ata_std_sched_eh); @@ -1070,7 +1067,7 @@ static void __ata_port_freeze(struct ata_port *ap) ap->pflags |= ATA_PFLAG_FROZEN; - DPRINTK("ata%u port frozen\n", ap->print_id); + trace_ata_port_freeze(ap); } /** @@ -1147,7 +1144,7 @@ void ata_eh_thaw_port(struct ata_port *ap) spin_unlock_irqrestore(ap->lock, flags); - DPRINTK("ata%u port thawed\n", ap->print_id); + trace_ata_port_thaw(ap); } static void ata_eh_scsidone(struct scsi_cmnd *scmd) @@ -1287,6 +1284,8 @@ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, struct ata_eh_context *ehc = &link->eh_context; unsigned long flags; + trace_ata_eh_about_to_do(link, dev ? dev->devno : 0, action); + spin_lock_irqsave(ap->lock, flags); ata_eh_clear_action(link, dev, ehi, action); @@ -1317,6 +1316,8 @@ void ata_eh_done(struct ata_link *link, struct ata_device *dev, { struct ata_eh_context *ehc = &link->eh_context; + trace_ata_eh_done(link, dev ? dev->devno : 0, action); + ata_eh_clear_action(link, dev, &ehc->i, action); } @@ -1421,8 +1422,6 @@ static void ata_eh_request_sense(struct ata_queued_cmd *qc, return; } - DPRINTK("ATA request sense\n"); - ata_tf_init(dev, &tf); tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; @@ -1463,8 +1462,6 @@ unsigned int atapi_eh_request_sense(struct ata_device *dev, struct ata_port *ap = dev->link->ap; struct ata_taskfile tf; - DPRINTK("ATAPI request sense\n"); - memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); /* initialize sense_buf with the error register, @@ -1928,8 +1925,6 @@ static void ata_eh_link_autopsy(struct ata_link *link) u32 serror; int rc; - DPRINTK("ENTER\n"); - if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) return; @@ -2036,7 +2031,6 @@ static void ata_eh_link_autopsy(struct ata_link *link) ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); } - DPRINTK("EXIT\n"); } /** @@ -2936,8 +2930,6 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, unsigned long flags; int rc = 0; - DPRINTK("ENTER\n"); - /* For PATA drive side cable detection to work, IDENTIFY must * be done backwards such that PDIAG- is released by the slave * device before the master device is identified. @@ -3051,7 +3043,6 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, err: *r_failed_dev = dev; - DPRINTK("EXIT rc=%d\n", rc); return rc; } @@ -3566,8 +3557,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, int rc, nr_fails; unsigned long flags, deadline; - DPRINTK("ENTER\n"); - /* prep for recovery */ ata_for_each_link(link, ap, EDGE) { struct ata_eh_context *ehc = &link->eh_context; @@ -3775,7 +3764,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, if (rc && r_failed_link) *r_failed_link = link; - DPRINTK("EXIT, rc=%d\n", rc); return rc; } diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index ba7be3f38617..e2e9cbd405fa 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -652,8 +652,6 @@ static int sata_pmp_revalidate(struct ata_device *dev, unsigned int new_class) u32 *gscr = (void *)ap->sector_buf; int rc; - DPRINTK("ENTER\n"); - ata_eh_about_to_do(link, NULL, ATA_EH_REVALIDATE); if (!ata_dev_enabled(dev)) { @@ -686,12 +684,10 @@ static int sata_pmp_revalidate(struct ata_device *dev, unsigned int new_class) ata_eh_done(link, NULL, ATA_EH_REVALIDATE); - DPRINTK("EXIT, rc=0\n"); return 0; fail: ata_dev_err(dev, "PMP revalidation failed (errno=%d)\n", rc); - DPRINTK("EXIT, rc=%d\n", rc); return rc; } @@ -759,8 +755,6 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap, int detach = 0, rc = 0; int reval_failed = 0; - DPRINTK("ENTER\n"); - if (dev->flags & ATA_DFLAG_DETACH) { detach = 1; rc = -ENODEV; @@ -828,7 +822,6 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap, /* okay, PMP resurrected */ ehc->i.flags = 0; - DPRINTK("EXIT, rc=0\n"); return 0; fail: @@ -838,7 +831,6 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap, else ata_dev_disable(dev); - DPRINTK("EXIT, rc=%d\n", rc); return rc; } diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index eddd33a3cb5f..d9b5744a3b06 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -533,8 +533,6 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, u32 scontrol; int rc; - DPRINTK("ENTER\n"); - if (online) *online = false; @@ -610,7 +608,6 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, *online = false; ata_link_err(link, "COMRESET failed (errno=%d)\n", rc); } - DPRINTK("EXIT, rc=%d\n", rc); return rc; } EXPORT_SYMBOL_GPL(sata_link_hardreset); diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index fcb8fde39614..d4e631aa976f 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -490,6 +490,37 @@ TRACE_EVENT(ata_eh_link_autopsy_qc, __parse_eh_err_mask(__entry->eh_err_mask)) ); +DECLARE_EVENT_CLASS(ata_eh_action_template, + + TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), + + TP_ARGS(link, devno, eh_action), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, ata_dev ) + __field( unsigned int, eh_action ) + ), + + TP_fast_assign( + __entry->ata_port = link->ap->print_id; + __entry->ata_dev = link->pmp + devno; + __entry->eh_action = eh_action; + ), + + TP_printk("ata_port=%u ata_dev=%u eh_action=%s", + __entry->ata_port, __entry->ata_dev, + __parse_eh_action(__entry->eh_action)) +); + +DEFINE_EVENT(ata_eh_action_template, ata_eh_about_to_do, + TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), + TP_ARGS(link, devno, eh_action)); + +DEFINE_EVENT(ata_eh_action_template, ata_eh_done, + TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), + TP_ARGS(link, devno, eh_action)); + DECLARE_EVENT_CLASS(ata_link_reset_begin_template, TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), @@ -570,6 +601,35 @@ DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset, TP_PROTO(struct ata_link *link, unsigned int *class, int rc), TP_ARGS(link, class, rc)); +DECLARE_EVENT_CLASS(ata_port_eh_begin_template, + + TP_PROTO(struct ata_port *ap), + + TP_ARGS(ap), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + ), + + TP_printk("ata_port=%u", __entry->ata_port) +); + +DEFINE_EVENT(ata_port_eh_begin_template, ata_std_sched_eh, + TP_PROTO(struct ata_port *ap), + TP_ARGS(ap)); + +DEFINE_EVENT(ata_port_eh_begin_template, ata_port_freeze, + TP_PROTO(struct ata_port *ap), + TP_ARGS(ap)); + +DEFINE_EVENT(ata_port_eh_begin_template, ata_port_thaw, + TP_PROTO(struct ata_port *ap), + TP_ARGS(ap)); + DECLARE_EVENT_CLASS(ata_sff_hsm_template, TP_PROTO(struct ata_queued_cmd *qc, unsigned char status), From 742bef476ca5352b16063161fb73a56629a6d995 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:35 +0100 Subject: [PATCH 235/493] ata: libata: move ata_{port,link,dev}_dbg to standard pr_XXX() macros Use standard pr_{debug,info,notice,warn,err} macros instead of the hand-crafted printk helpers. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-acpi.c | 48 +++++++++++++------------ drivers/ata/libata-core.c | 61 ------------------------------- drivers/ata/pata_ixp4xx_cf.c | 6 ++-- include/linux/libata.h | 69 ++++++++++++++++++++---------------- 4 files changed, 67 insertions(+), 117 deletions(-) diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 7a7d6642edcc..7007377880ce 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -650,9 +650,7 @@ static int ata_acpi_run_tf(struct ata_device *dev, struct ata_taskfile *pptf = NULL; struct ata_taskfile tf, ptf, rtf; unsigned int err_mask; - const char *level; const char *descr; - char msg[60]; int rc; if ((gtf->tf[0] == 0) && (gtf->tf[1] == 0) && (gtf->tf[2] == 0) @@ -666,6 +664,10 @@ static int ata_acpi_run_tf(struct ata_device *dev, pptf = &ptf; } + descr = ata_get_cmd_descript(tf.command); + if (!descr) + descr = "unknown"; + if (!ata_acpi_filter_tf(dev, &tf, pptf)) { rtf = tf; err_mask = ata_exec_internal(dev, &rtf, NULL, @@ -673,40 +675,42 @@ static int ata_acpi_run_tf(struct ata_device *dev, switch (err_mask) { case 0: - level = KERN_DEBUG; - snprintf(msg, sizeof(msg), "succeeded"); + ata_dev_dbg(dev, + "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x" + "(%s) succeeded\n", + tf.command, tf.feature, tf.nsect, tf.lbal, + tf.lbam, tf.lbah, tf.device, descr); rc = 1; break; case AC_ERR_DEV: - level = KERN_INFO; - snprintf(msg, sizeof(msg), - "rejected by device (Stat=0x%02x Err=0x%02x)", - rtf.command, rtf.feature); + ata_dev_info(dev, + "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x" + "(%s) rejected by device (Stat=0x%02x Err=0x%02x)", + tf.command, tf.feature, tf.nsect, tf.lbal, + tf.lbam, tf.lbah, tf.device, descr, + rtf.command, rtf.feature); rc = 0; break; default: - level = KERN_ERR; - snprintf(msg, sizeof(msg), - "failed (Emask=0x%x Stat=0x%02x Err=0x%02x)", - err_mask, rtf.command, rtf.feature); + ata_dev_err(dev, + "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x" + "(%s) failed (Emask=0x%x Stat=0x%02x Err=0x%02x)", + tf.command, tf.feature, tf.nsect, tf.lbal, + tf.lbam, tf.lbah, tf.device, descr, + err_mask, rtf.command, rtf.feature); rc = -EIO; break; } } else { - level = KERN_INFO; - snprintf(msg, sizeof(msg), "filtered out"); + ata_dev_info(dev, + "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x" + "(%s) filtered out\n", + tf.command, tf.feature, tf.nsect, tf.lbal, + tf.lbam, tf.lbah, tf.device, descr); rc = 0; } - descr = ata_get_cmd_descript(tf.command); - - ata_dev_printk(dev, level, - "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x (%s) %s\n", - tf.command, tf.feature, tf.nsect, tf.lbal, - tf.lbam, tf.lbah, tf.device, - (descr ? descr : "unknown"), msg); - return rc; } diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 3db4fd2029ce..d19984e5dfbc 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6510,67 +6510,6 @@ const struct ata_port_info ata_dummy_port_info = { }; EXPORT_SYMBOL_GPL(ata_dummy_port_info); -/* - * Utility print functions - */ -void ata_port_printk(const struct ata_port *ap, const char *level, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - printk("%sata%u: %pV", level, ap->print_id, &vaf); - - va_end(args); -} -EXPORT_SYMBOL(ata_port_printk); - -void ata_link_printk(const struct ata_link *link, const char *level, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - if (sata_pmp_attached(link->ap) || link->ap->slave_link) - printk("%sata%u.%02u: %pV", - level, link->ap->print_id, link->pmp, &vaf); - else - printk("%sata%u: %pV", - level, link->ap->print_id, &vaf); - - va_end(args); -} -EXPORT_SYMBOL(ata_link_printk); - -void ata_dev_printk(const struct ata_device *dev, const char *level, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - printk("%sata%u.%02u: %pV", - level, dev->link->ap->print_id, dev->link->pmp + dev->devno, - &vaf); - - va_end(args); -} -EXPORT_SYMBOL(ata_dev_printk); - void ata_print_version(const struct device *dev, const char *version) { dev_printk(KERN_DEBUG, dev, "version %s\n", version); diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c index 99c63087c8ae..17b557c91e1c 100644 --- a/drivers/ata/pata_ixp4xx_cf.c +++ b/drivers/ata/pata_ixp4xx_cf.c @@ -114,7 +114,7 @@ static void ixp4xx_set_piomode(struct ata_port *ap, struct ata_device *adev) { struct ixp4xx_pata *ixpp = ap->host->private_data; - ata_dev_printk(adev, KERN_INFO, "configured for PIO%d 8bit\n", + ata_dev_info(adev, "configured for PIO%d 8bit\n", adev->pio_mode - XFER_PIO_0); ixp4xx_set_8bit_timing(ixpp, adev->pio_mode); } @@ -132,8 +132,8 @@ static unsigned int ixp4xx_mmio_data_xfer(struct ata_queued_cmd *qc, struct ixp4xx_pata *ixpp = ap->host->private_data; unsigned long flags; - ata_dev_printk(adev, KERN_DEBUG, "%s %d bytes\n", (rw == READ) ? "READ" : "WRITE", - buflen); + ata_dev_dbg(adev, "%s %d bytes\n", (rw == READ) ? "READ" : "WRITE", + buflen); spin_lock_irqsave(ap->lock, flags); /* set the expansion bus in 16bit mode and restore diff --git a/include/linux/libata.h b/include/linux/libata.h index 235fdbeb19ea..39cdde0b9491 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1489,51 +1489,61 @@ static inline int sata_srst_pmp(struct ata_link *link) return link->pmp; } -/* - * printk helpers - */ -__printf(3, 4) -void ata_port_printk(const struct ata_port *ap, const char *level, - const char *fmt, ...); -__printf(3, 4) -void ata_link_printk(const struct ata_link *link, const char *level, - const char *fmt, ...); -__printf(3, 4) -void ata_dev_printk(const struct ata_device *dev, const char *level, - const char *fmt, ...); +#define ata_port_printk(level, ap, fmt, ...) \ + pr_ ## level ("ata%u: " fmt, (ap)->print_id, ##__VA_ARGS__) #define ata_port_err(ap, fmt, ...) \ - ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__) + ata_port_printk(err, ap, fmt, ##__VA_ARGS__) #define ata_port_warn(ap, fmt, ...) \ - ata_port_printk(ap, KERN_WARNING, fmt, ##__VA_ARGS__) + ata_port_printk(warn, ap, fmt, ##__VA_ARGS__) #define ata_port_notice(ap, fmt, ...) \ - ata_port_printk(ap, KERN_NOTICE, fmt, ##__VA_ARGS__) + ata_port_printk(notice, ap, fmt, ##__VA_ARGS__) #define ata_port_info(ap, fmt, ...) \ - ata_port_printk(ap, KERN_INFO, fmt, ##__VA_ARGS__) + ata_port_printk(info, ap, fmt, ##__VA_ARGS__) #define ata_port_dbg(ap, fmt, ...) \ - ata_port_printk(ap, KERN_DEBUG, fmt, ##__VA_ARGS__) + ata_port_printk(debug, ap, fmt, ##__VA_ARGS__) + +#define ata_link_printk(level, link, fmt, ...) \ +do { \ + if (sata_pmp_attached((link)->ap) || \ + (link)->ap->slave_link) \ + pr_ ## level ("ata%u.%02u: " fmt, \ + (link)->ap->print_id, \ + (link)->pmp, \ + ##__VA_ARGS__); \ + else \ + pr_ ## level ("ata%u: " fmt, \ + (link)->ap->print_id, \ + ##__VA_ARGS__); \ +} while (0) #define ata_link_err(link, fmt, ...) \ - ata_link_printk(link, KERN_ERR, fmt, ##__VA_ARGS__) + ata_link_printk(err, link, fmt, ##__VA_ARGS__) #define ata_link_warn(link, fmt, ...) \ - ata_link_printk(link, KERN_WARNING, fmt, ##__VA_ARGS__) + ata_link_printk(warn, link, fmt, ##__VA_ARGS__) #define ata_link_notice(link, fmt, ...) \ - ata_link_printk(link, KERN_NOTICE, fmt, ##__VA_ARGS__) + ata_link_printk(notice, link, fmt, ##__VA_ARGS__) #define ata_link_info(link, fmt, ...) \ - ata_link_printk(link, KERN_INFO, fmt, ##__VA_ARGS__) + ata_link_printk(info, link, fmt, ##__VA_ARGS__) #define ata_link_dbg(link, fmt, ...) \ - ata_link_printk(link, KERN_DEBUG, fmt, ##__VA_ARGS__) + ata_link_printk(debug, link, fmt, ##__VA_ARGS__) + +#define ata_dev_printk(level, dev, fmt, ...) \ + pr_ ## level("ata%u.%02u: " fmt, \ + (dev)->link->ap->print_id, \ + (dev)->link->pmp + (dev)->devno, \ + ##__VA_ARGS__) #define ata_dev_err(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_ERR, fmt, ##__VA_ARGS__) + ata_dev_printk(err, dev, fmt, ##__VA_ARGS__) #define ata_dev_warn(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_WARNING, fmt, ##__VA_ARGS__) + ata_dev_printk(warn, dev, fmt, ##__VA_ARGS__) #define ata_dev_notice(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_NOTICE, fmt, ##__VA_ARGS__) + ata_dev_printk(notice, dev, fmt, ##__VA_ARGS__) #define ata_dev_info(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__) + ata_dev_printk(info, dev, fmt, ##__VA_ARGS__) #define ata_dev_dbg(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_DEBUG, fmt, ##__VA_ARGS__) + ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__) void ata_print_version(const struct device *dev, const char *version); @@ -2067,11 +2077,8 @@ static inline u8 ata_wait_idle(struct ata_port *ap) { u8 status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000); -#ifdef ATA_DEBUG if (status != 0xff && (status & (ATA_BUSY | ATA_DRQ))) - ata_port_printk(ap, KERN_DEBUG, "abnormal Status 0x%X\n", - status); -#endif + ata_port_dbg(ap, "abnormal Status 0x%X\n", status); return status; } From d452090301fa19e99a1a1422f70cd7b1092a0f9b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:36 +0100 Subject: [PATCH 236/493] ata: libata: revamp ata_get_cmd_descript() Rename ata_get_cmd_descrip() to ata_get_cmd_name() and simplify it to return "unknown" instead of NULL. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-acpi.c | 4 +--- drivers/ata/libata-eh.c | 22 +++++++++------------- drivers/ata/libata.h | 2 +- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 7007377880ce..9e1e62b9cf63 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -664,9 +664,7 @@ static int ata_acpi_run_tf(struct ata_device *dev, pptf = &ptf; } - descr = ata_get_cmd_descript(tf.command); - if (!descr) - descr = "unknown"; + descr = ata_get_cmd_name(tf.command); if (!ata_acpi_filter_tf(dev, &tf, pptf)) { rtf = tf; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 69f51616d8bd..8bf52a6239aa 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2080,16 +2080,15 @@ void ata_eh_autopsy(struct ata_port *ap) } /** - * ata_get_cmd_descript - get description for ATA command - * @command: ATA command code to get description for + * ata_get_cmd_name - get name for ATA command + * @command: ATA command code to get name for * - * Return a textual description of the given command, or NULL if the - * command is not known. + * Return a textual name of the given command or "unknown" * * LOCKING: * None */ -const char *ata_get_cmd_descript(u8 command) +const char *ata_get_cmd_name(u8 command) { #ifdef CONFIG_ATA_VERBOSE_ERROR static const struct @@ -2197,9 +2196,9 @@ const char *ata_get_cmd_descript(u8 command) return cmd_descr[i].text; #endif - return NULL; + return "unknown"; } -EXPORT_SYMBOL_GPL(ata_get_cmd_descript); +EXPORT_SYMBOL_GPL(ata_get_cmd_name); /** * ata_eh_link_report - report error handling to user @@ -2348,12 +2347,9 @@ static void ata_eh_link_report(struct ata_link *link) } __scsi_format_command(cdb_buf, sizeof(cdb_buf), cdb, cdb_len); - } else { - const char *descr = ata_get_cmd_descript(cmd->command); - if (descr) - ata_dev_err(qc->dev, "failed command: %s\n", - descr); - } + } else + ata_dev_err(qc->dev, "failed command: %s\n", + ata_get_cmd_name(cmd->command)); ata_dev_err(qc->dev, "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 4a8f4623cfe5..2144065e762c 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -166,7 +166,7 @@ extern void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, extern void ata_eh_done(struct ata_link *link, struct ata_device *dev, unsigned int action); extern void ata_eh_autopsy(struct ata_port *ap); -const char *ata_get_cmd_descript(u8 command); +const char *ata_get_cmd_name(u8 command); extern void ata_eh_report(struct ata_port *ap); extern int ata_eh_reset(struct ata_link *link, int classify, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, From 4633778b254d6183eb1dd6b538b8e04583167f51 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:37 +0100 Subject: [PATCH 237/493] ata: libata: move DPRINTK to ata debugging Replace all DPRINTK calls with ata_dev_dbg(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d19984e5dfbc..c9552606b015 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1965,7 +1965,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, unsigned int err_mask; bool dma = false; - DPRINTK("read log page - log 0x%x, page 0x%x\n", log, page); + ata_dev_dbg(dev, "read log page - log 0x%x, page 0x%x\n", log, page); /* * Return error without actually issuing the command on controllers @@ -3341,8 +3341,8 @@ static int ata_dev_set_mode(struct ata_device *dev) dev_err_whine = " (device error ignored)"; } - DPRINTK("xfer_shift=%u, xfer_mode=0x%x\n", - dev->xfer_shift, (int)dev->xfer_mode); + ata_dev_dbg(dev, "xfer_shift=%u, xfer_mode=0x%x\n", + dev->xfer_shift, (int)dev->xfer_mode); if (!(ehc->i.flags & ATA_EHI_QUIET) || ehc->i.flags & ATA_EHI_DID_HARDRESET) @@ -4288,7 +4288,7 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev) unsigned int err_mask; /* set up set-features taskfile */ - DPRINTK("set features - xfer mode\n"); + ata_dev_dbg(dev, "set features - xfer mode\n"); /* Some controllers and ATAPI devices show flaky interrupt * behavior after setting xfer mode. Use polling instead. @@ -4310,7 +4310,6 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev) /* On some disks, this command causes spin-up, so we need longer timeout */ err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 15000); - DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; } @@ -4336,7 +4335,7 @@ unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature) unsigned long timeout = 0; /* set up set-features taskfile */ - DPRINTK("set features - SATA features\n"); + ata_dev_dbg(dev, "set features - SATA features\n"); ata_tf_init(dev, &tf); tf.command = ATA_CMD_SET_FEATURES; @@ -4350,7 +4349,6 @@ unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature) ata_probe_timeout * 1000 : SETFEATURES_SPINUP_TIMEOUT; err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, timeout); - DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; } EXPORT_SYMBOL_GPL(ata_dev_set_feature); @@ -4378,7 +4376,7 @@ static unsigned int ata_dev_init_params(struct ata_device *dev, return AC_ERR_INVALID; /* set up init dev params taskfile */ - DPRINTK("init dev params \n"); + ata_dev_dbg(dev, "init dev params \n"); ata_tf_init(dev, &tf); tf.command = ATA_CMD_INIT_DEV_PARAMS; @@ -4394,7 +4392,6 @@ static unsigned int ata_dev_init_params(struct ata_device *dev, if (err_mask == AC_ERR_DEV && (tf.feature & ATA_ABORTED)) err_mask = 0; - DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; } From 37fcfade40f77679f9a3942cbe630f4f4be11452 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:38 +0100 Subject: [PATCH 238/493] ata: sata_mv: kill 'port' argument in mv_dump_all_regs() Always '-1', so drop it and simplify the function. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_mv.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index cae4c1eab102..c5b3d45a7c39 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -1280,24 +1280,17 @@ static void mv_dump_pci_cfg(struct pci_dev *pdev, unsigned bytes) #endif } #endif -static void mv_dump_all_regs(void __iomem *mmio_base, int port, +static void mv_dump_all_regs(void __iomem *mmio_base, struct pci_dev *pdev) { #ifdef ATA_DEBUG - void __iomem *hc_base = mv_hc_base(mmio_base, - port >> MV_PORT_HC_SHIFT); + void __iomem *hc_base; void __iomem *port_base; int start_port, num_ports, p, start_hc, num_hcs, hc; - if (0 > port) { - start_hc = start_port = 0; - num_ports = 8; /* shld be benign for 4 port devs */ - num_hcs = 2; - } else { - start_hc = port >> MV_PORT_HC_SHIFT; - start_port = port; - num_ports = num_hcs = 1; - } + start_hc = start_port = 0; + num_ports = 8; /* should be benign for 4 port devs */ + num_hcs = 2; DPRINTK("All registers for port(s) %u-%u:\n", start_port, num_ports > 1 ? num_ports - 1 : start_port); @@ -2963,7 +2956,7 @@ static int mv_pci_error(struct ata_host *host, void __iomem *mmio) dev_err(host->dev, "PCI ERROR; PCI IRQ cause=0x%08x\n", err_cause); DPRINTK("All regs @ PCI error\n"); - mv_dump_all_regs(mmio, -1, to_pci_dev(host->dev)); + mv_dump_all_regs(mmio, to_pci_dev(host->dev)); writelfl(0, mmio + hpriv->irq_cause_offset); From a2715a42380bed98be4797287f97c07a388d5695 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:39 +0100 Subject: [PATCH 239/493] ata: sata_mv: replace DPRINTK with dynamic debugging Move the DPRINTK calls over to dynamic debugging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_mv.c | 76 +++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index c5b3d45a7c39..f00540641027 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -1248,42 +1248,43 @@ static int mv_stop_edma(struct ata_port *ap) return err; } -#ifdef ATA_DEBUG -static void mv_dump_mem(void __iomem *start, unsigned bytes) +static void mv_dump_mem(struct device *dev, void __iomem *start, unsigned bytes) { - int b, w; + int b, w, o; + unsigned char linebuf[38]; + for (b = 0; b < bytes; ) { - DPRINTK("%p: ", start + b); - for (w = 0; b < bytes && w < 4; w++) { - printk("%08x ", readl(start + b)); + for (w = 0, o = 0; b < bytes && w < 4; w++) { + o += snprintf(linebuf + o, sizeof(linebuf) - o, + "%08x ", readl(start + b)); b += sizeof(u32); } - printk("\n"); + dev_dbg(dev, "%s: %p: %s\n", + __func__, start + b, linebuf); } } -#endif -#if defined(ATA_DEBUG) || defined(CONFIG_PCI) + static void mv_dump_pci_cfg(struct pci_dev *pdev, unsigned bytes) { -#ifdef ATA_DEBUG - int b, w; - u32 dw; + int b, w, o; + u32 dw = 0; + unsigned char linebuf[38]; + for (b = 0; b < bytes; ) { - DPRINTK("%02x: ", b); - for (w = 0; b < bytes && w < 4; w++) { + for (w = 0, o = 0; b < bytes && w < 4; w++) { (void) pci_read_config_dword(pdev, b, &dw); - printk("%08x ", dw); + o += snprintf(linebuf + o, sizeof(linebuf) - o, + "%08x ", dw); b += sizeof(u32); } - printk("\n"); + dev_dbg(&pdev->dev, "%s: %02x: %s\n", + __func__, b, linebuf); } -#endif } -#endif + static void mv_dump_all_regs(void __iomem *mmio_base, struct pci_dev *pdev) { -#ifdef ATA_DEBUG void __iomem *hc_base; void __iomem *port_base; int start_port, num_ports, p, start_hc, num_hcs, hc; @@ -1291,31 +1292,30 @@ static void mv_dump_all_regs(void __iomem *mmio_base, start_hc = start_port = 0; num_ports = 8; /* should be benign for 4 port devs */ num_hcs = 2; - DPRINTK("All registers for port(s) %u-%u:\n", start_port, - num_ports > 1 ? num_ports - 1 : start_port); + dev_dbg(&pdev->dev, + "%s: All registers for port(s) %u-%u:\n", __func__, + start_port, num_ports > 1 ? num_ports - 1 : start_port); - if (NULL != pdev) { - DPRINTK("PCI config space regs:\n"); - mv_dump_pci_cfg(pdev, 0x68); - } - DPRINTK("PCI regs:\n"); - mv_dump_mem(mmio_base+0xc00, 0x3c); - mv_dump_mem(mmio_base+0xd00, 0x34); - mv_dump_mem(mmio_base+0xf00, 0x4); - mv_dump_mem(mmio_base+0x1d00, 0x6c); + dev_dbg(&pdev->dev, "%s: PCI config space regs:\n", __func__); + mv_dump_pci_cfg(pdev, 0x68); + + dev_dbg(&pdev->dev, "%s: PCI regs:\n", __func__); + mv_dump_mem(&pdev->dev, mmio_base+0xc00, 0x3c); + mv_dump_mem(&pdev->dev, mmio_base+0xd00, 0x34); + mv_dump_mem(&pdev->dev, mmio_base+0xf00, 0x4); + mv_dump_mem(&pdev->dev, mmio_base+0x1d00, 0x6c); for (hc = start_hc; hc < start_hc + num_hcs; hc++) { hc_base = mv_hc_base(mmio_base, hc); - DPRINTK("HC regs (HC %i):\n", hc); - mv_dump_mem(hc_base, 0x1c); + dev_dbg(&pdev->dev, "%s: HC regs (HC %i):\n", __func__, hc); + mv_dump_mem(&pdev->dev, hc_base, 0x1c); } for (p = start_port; p < start_port + num_ports; p++) { port_base = mv_port_base(mmio_base, p); - DPRINTK("EDMA regs (port %i):\n", p); - mv_dump_mem(port_base, 0x54); - DPRINTK("SATA regs (port %i):\n", p); - mv_dump_mem(port_base+0x300, 0x60); + dev_dbg(&pdev->dev, "%s: EDMA regs (port %i):\n", __func__, p); + mv_dump_mem(&pdev->dev, port_base, 0x54); + dev_dbg(&pdev->dev, "%s: SATA regs (port %i):\n", __func__, p); + mv_dump_mem(&pdev->dev, port_base+0x300, 0x60); } -#endif } static unsigned int mv_scr_offset(unsigned int sc_reg_in) @@ -2955,7 +2955,7 @@ static int mv_pci_error(struct ata_host *host, void __iomem *mmio) dev_err(host->dev, "PCI ERROR; PCI IRQ cause=0x%08x\n", err_cause); - DPRINTK("All regs @ PCI error\n"); + dev_dbg(host->dev, "%s: All regs @ PCI error\n", __func__); mv_dump_all_regs(mmio, to_pci_dev(host->dev)); writelfl(0, mmio + hpriv->irq_cause_offset); From e392e3944f8b1c2075f8e361a2255ca9037e3fc8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:40 +0100 Subject: [PATCH 240/493] ata: pata_octeon_cf: remove DPRINTK() macro in interrupt context There is only so much information to be glanced when the interrupt routine is called and exited, so remove these DPRINTK() calls. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_octeon_cf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index 1fe756af3268..07eda263b4c1 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -668,7 +668,6 @@ static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance) spin_lock_irqsave(&host->lock, flags); - DPRINTK("ENTER\n"); for (i = 0; i < host->n_ports; i++) { u8 status; struct ata_port *ap; @@ -723,7 +722,6 @@ static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance) } } spin_unlock_irqrestore(&host->lock, flags); - DPRINTK("EXIT\n"); return IRQ_RETVAL(handled); } From 774f6bac2ed39ab1d1a7c41cc986279866486c07 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:41 +0100 Subject: [PATCH 241/493] ata: pdc_adma: Remove DPRINTK call The DPRINTK call doesn't print information which isn't already covered by tracepoints later on. Remove it. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pdc_adma.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c index 5db55e1e2a61..2c910c4cd4de 100644 --- a/drivers/ata/pdc_adma.c +++ b/drivers/ata/pdc_adma.c @@ -475,8 +475,6 @@ static inline unsigned int adma_intr_mmio(struct ata_host *host) u8 status = ata_sff_check_status(ap); if ((status & ATA_BUSY)) continue; - DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n", - ap->print_id, qc->tf.protocol, status); /* complete taskfile transaction */ pp->state = adma_state_idle; From 65945144fa849d87b23cac4fbc8565807eefee02 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:42 +0100 Subject: [PATCH 242/493] ata: sata_fsl: move DPRINTK to ata debugging Replace all DPRINTK calls with the ata_XXX_dbg functions. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_fsl.c | 83 ++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 8aace70e8826..3afd727f1a4f 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -313,10 +313,10 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host, intr_coalescing_ticks = ticks; spin_unlock_irqrestore(&host->lock, flags); - DPRINTK("interrupt coalescing, count = 0x%x, ticks = %x\n", - intr_coalescing_count, intr_coalescing_ticks); - DPRINTK("ICC register status: (hcr base: %p) = 0x%x\n", - hcr_base, ioread32(hcr_base + ICC)); + dev_dbg(host->dev, "interrupt coalescing, count = 0x%x, ticks = %x\n", + intr_coalescing_count, intr_coalescing_ticks); + dev_dbg(host->dev, "ICC register status: (hcr base: 0x%p) = 0x%x\n", + hcr_base, ioread32(hcr_base + ICC)); } static ssize_t fsl_sata_intr_coalescing_show(struct device *dev, @@ -387,18 +387,19 @@ static ssize_t fsl_sata_rx_watermark_store(struct device *dev, return strlen(buf); } -static inline unsigned int sata_fsl_tag(unsigned int tag, +static inline unsigned int sata_fsl_tag(struct ata_port *ap, + unsigned int tag, void __iomem *hcr_base) { /* We let libATA core do actual (queue) tag allocation */ if (unlikely(tag >= SATA_FSL_QUEUE_DEPTH)) { - DPRINTK("tag %d invalid : out of range\n", tag); + ata_port_dbg(ap, "tag %d invalid : out of range\n", tag); return 0; } if (unlikely((ioread32(hcr_base + CQ)) & (1 << tag))) { - DPRINTK("tag %d invalid : in use!!\n", tag); + ata_port_dbg(ap, "tag %d invalid : in use!!\n", tag); return 0; } @@ -510,7 +511,7 @@ static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc) struct sata_fsl_port_priv *pp = ap->private_data; struct sata_fsl_host_priv *host_priv = ap->host->private_data; void __iomem *hcr_base = host_priv->hcr_base; - unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base); + unsigned int tag = sata_fsl_tag(ap, qc->hw_tag, hcr_base); struct command_desc *cd; u32 desc_info = CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE; u32 num_prde = 0; @@ -559,7 +560,7 @@ static unsigned int sata_fsl_qc_issue(struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; struct sata_fsl_host_priv *host_priv = ap->host->private_data; void __iomem *hcr_base = host_priv->hcr_base; - unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base); + unsigned int tag = sata_fsl_tag(ap, qc->hw_tag, hcr_base); VPRINTK("xx_qc_issue called,CQ=0x%x,CA=0x%x,CE=0x%x,CC=0x%x\n", ioread32(CQ + hcr_base), @@ -588,7 +589,7 @@ static bool sata_fsl_qc_fill_rtf(struct ata_queued_cmd *qc) struct sata_fsl_port_priv *pp = qc->ap->private_data; struct sata_fsl_host_priv *host_priv = qc->ap->host->private_data; void __iomem *hcr_base = host_priv->hcr_base; - unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base); + unsigned int tag = sata_fsl_tag(qc->ap, qc->hw_tag, hcr_base); struct command_desc *cd; cd = pp->cmdentry + tag; @@ -852,9 +853,10 @@ try_offline_again: goto try_offline_again; } - DPRINTK("hardreset, controller off-lined\n"); - VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); - VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); + ata_port_dbg(ap, "hardreset, controller off-lined\n" + "HStatus = 0x%x HControl = 0x%x\n", + ioread32(hcr_base + HSTATUS), + ioread32(hcr_base + HCONTROL)); /* * PHY reset should remain asserted for atleast 1ms @@ -882,9 +884,10 @@ try_offline_again: goto err; } - DPRINTK("hardreset, controller off-lined & on-lined\n"); - VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); - VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); + ata_port_dbg(ap, "controller off-lined & on-lined\n" + "HStatus = 0x%x HControl = 0x%x\n", + ioread32(hcr_base + HSTATUS), + ioread32(hcr_base + HCONTROL)); /* * First, wait for the PHYRDY change to occur before waiting for @@ -964,7 +967,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, tf.ctl |= ATA_SRST; /* setup SRST bit in taskfile control reg */ ata_tf_to_fis(&tf, pmp, 0, cfis); - DPRINTK("Dumping cfis : 0x%x, 0x%x, 0x%x, 0x%x\n", + ata_port_dbg(ap, "Dumping cfis : 0x%x, 0x%x, 0x%x, 0x%x\n", cfis[0], cfis[1], cfis[2], cfis[3]); /* @@ -972,7 +975,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, * other commands are active on the controller/device */ - DPRINTK("@Softreset, CQ = 0x%x, CA = 0x%x, CC = 0x%x\n", + ata_port_dbg(ap, "CQ = 0x%x, CA = 0x%x, CC = 0x%x\n", ioread32(CQ + hcr_base), ioread32(CA + hcr_base), ioread32(CC + hcr_base)); @@ -985,15 +988,16 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, if (temp & 0x1) { ata_port_warn(ap, "ATA_SRST issue failed\n"); - DPRINTK("Softreset@5000,CQ=0x%x,CA=0x%x,CC=0x%x\n", + ata_port_dbg(ap, "Softreset@5000,CQ=0x%x,CA=0x%x,CC=0x%x\n", ioread32(CQ + hcr_base), ioread32(CA + hcr_base), ioread32(CC + hcr_base)); sata_fsl_scr_read(&ap->link, SCR_ERROR, &Serror); - DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); - DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); - DPRINTK("Serror = 0x%x\n", Serror); + ata_port_dbg(ap, "HStatus = 0x%x HControl = 0x%x Serror = 0x%x\n", + ioread32(hcr_base + HSTATUS), + ioread32(hcr_base + HCONTROL), + Serror); goto err; } @@ -1050,10 +1054,7 @@ err: static void sata_fsl_error_handler(struct ata_port *ap) { - - DPRINTK("in xx_error_handler\n"); sata_pmp_error_handler(ap); - } static void sata_fsl_post_internal_cmd(struct ata_queued_cmd *qc) @@ -1094,7 +1095,7 @@ static void sata_fsl_error_intr(struct ata_port *ap) if (unlikely(SError & 0xFFFF0000)) sata_fsl_scr_write(&ap->link, SCR_ERROR, SError); - DPRINTK("error_intr,hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n", + ata_port_dbg(ap, "hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n", hstatus, cereg, ioread32(hcr_base + DE), SError); /* handle fatal errors */ @@ -1111,7 +1112,7 @@ static void sata_fsl_error_intr(struct ata_port *ap) /* Handle PHYRDY change notification */ if (hstatus & INT_ON_PHYRDY_CHG) { - DPRINTK("SATA FSL: PHYRDY change indication\n"); + ata_port_dbg(ap, "PHYRDY change indication\n"); /* Setup a soft-reset EH action */ ata_ehi_hotplugged(ehi); @@ -1132,7 +1133,7 @@ static void sata_fsl_error_intr(struct ata_port *ap) */ abort = 1; - DPRINTK("single device error, CE=0x%x, DE=0x%x\n", + ata_port_dbg(ap, "single device error, CE=0x%x, DE=0x%x\n", ioread32(hcr_base + CE), ioread32(hcr_base + DE)); /* find out the offending link and qc */ @@ -1237,12 +1238,12 @@ static void sata_fsl_host_intr(struct ata_port *ap) } if (unlikely(SError & 0xFFFF0000)) { - DPRINTK("serror @host_intr : 0x%x\n", SError); + ata_port_dbg(ap, "serror @host_intr : 0x%x\n", SError); sata_fsl_error_intr(ap); } if (unlikely(hstatus & status_mask)) { - DPRINTK("error interrupt!!\n"); + ata_port_dbg(ap, "error interrupt!!\n"); sata_fsl_error_intr(ap); return; } @@ -1260,15 +1261,13 @@ static void sata_fsl_host_intr(struct ata_port *ap) /* clear CC bit, this will also complete the interrupt */ iowrite32(done_mask, hcr_base + CC); - DPRINTK("Status of all queues :\n"); - DPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x\n", + ata_port_dbg(ap, "Status of all queues: done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x\n", done_mask, ioread32(hcr_base + CA), ioread32(hcr_base + CE)); for (i = 0; i < SATA_FSL_QUEUE_DEPTH; i++) { if (done_mask & (1 << i)) - DPRINTK - ("completing ncq cmd,tag=%d,CC=0x%x,CA=0x%x\n", + ata_port_dbg(ap, "completing ncq cmd,tag=%d,CC=0x%x,CA=0x%x\n", i, ioread32(hcr_base + CC), ioread32(hcr_base + CA)); } @@ -1279,7 +1278,7 @@ static void sata_fsl_host_intr(struct ata_port *ap) iowrite32(1, hcr_base + CC); qc = ata_qc_from_tag(ap, ATA_TAG_INTERNAL); - DPRINTK("completing non-ncq cmd, CC=0x%x\n", + ata_port_dbg(ap, "completing non-ncq cmd, CC=0x%x\n", ioread32(hcr_base + CC)); if (qc) { @@ -1287,7 +1286,7 @@ static void sata_fsl_host_intr(struct ata_port *ap) } } else { /* Spurious Interrupt!! */ - DPRINTK("spurious interrupt!!, CC = 0x%x\n", + ata_port_dbg(ap, "spurious interrupt!!, CC = 0x%x\n", ioread32(hcr_base + CC)); iowrite32(done_mask, hcr_base + CC); return; @@ -1307,8 +1306,6 @@ static irqreturn_t sata_fsl_interrupt(int irq, void *dev_instance) interrupt_enables = ioread32(hcr_base + HSTATUS); interrupt_enables &= 0x3F; - DPRINTK("interrupt status 0x%x\n", interrupt_enables); - if (!interrupt_enables) return IRQ_NONE; @@ -1361,7 +1358,7 @@ static int sata_fsl_init_controller(struct ata_host *host) iowrite32((temp & ~0x3F), hcr_base + HCONTROL); /* Disable interrupt coalescing control(icc), for the moment */ - DPRINTK("icc = 0x%x\n", ioread32(hcr_base + ICC)); + dev_dbg(host->dev, "icc = 0x%x\n", ioread32(hcr_base + ICC)); iowrite32(0x01000000, hcr_base + ICC); /* clear error registers, SError is cleared by libATA */ @@ -1380,8 +1377,8 @@ static int sata_fsl_init_controller(struct ata_host *host) * callback, that should also initiate the OOB, COMINIT sequence */ - DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); - DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); + dev_dbg(host->dev, "HStatus = 0x%x HControl = 0x%x\n", + ioread32(hcr_base + HSTATUS), ioread32(hcr_base + HCONTROL)); return 0; } @@ -1470,9 +1467,7 @@ static int sata_fsl_probe(struct platform_device *ofdev) iowrite32(temp | TRANSCFG_RX_WATER_MARK, csr_base + TRANSCFG); } - DPRINTK("@reset i/o = 0x%x\n", ioread32(csr_base + TRANSCFG)); - DPRINTK("sizeof(cmd_desc) = %d\n", sizeof(struct command_desc)); - DPRINTK("sizeof(#define cmd_desc) = %d\n", SATA_FSL_CMD_DESC_SIZE); + ata_port_dbg(ap, "@reset i/o = 0x%x\n", ioread32(csr_base + TRANSCFG)); host_priv = kzalloc(sizeof(struct sata_fsl_host_priv), GFP_KERNEL); if (!host_priv) From fa538d4020e61ff3f71eb29516b4fc02ba129c33 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:43 +0100 Subject: [PATCH 243/493] ata: sata_rcar: replace DPRINTK() with ata_port_dbg() Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_rcar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index b4994d182eda..11e68e3854f8 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -679,7 +679,7 @@ static void sata_rcar_serr_interrupt(struct ata_port *ap) if (!serror) return; - DPRINTK("SError @host_intr: 0x%x\n", serror); + ata_port_dbg(ap, "SError @host_intr: 0x%x\n", serror); /* first, analyze and record host port events */ ata_ehi_clear_desc(ehi); From 1891b92a4cffaacd6c54684621440e6805a15e3b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:44 +0100 Subject: [PATCH 244/493] ata: sata_qstor: replace DPRINTK() with dev_dbg() Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_qstor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c index ef00ab644afb..262b69549849 100644 --- a/drivers/ata/sata_qstor.c +++ b/drivers/ata/sata_qstor.c @@ -374,8 +374,8 @@ static inline unsigned int qs_intr_pkt(struct ata_host *host) struct qs_port_priv *pp = ap->private_data; struct ata_queued_cmd *qc; - DPRINTK("SFF=%08x%08x: sCHAN=%u sHST=%d sDST=%02x\n", - sff1, sff0, port_no, sHST, sDST); + dev_dbg(host->dev, "SFF=%08x%08x: sHST=%d sDST=%02x\n", + sff1, sff0, sHST, sDST); handled = 1; if (!pp || pp->state != qs_state_pkt) continue; From b5a5fc8b0f8175e4b3aaf182c1b23de4ccdd3347 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:45 +0100 Subject: [PATCH 245/493] ata: pata_pdc2027x: Replace PDPRINTK() with standard ata logging Use standard ata logging macros instead of the hand-crafted PDPRINTK and remove duplicate logging messages. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_pdc2027x.c | 71 +++++++++++++++---------------------- 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c index effc1a09444d..4fbb3eed8b0b 100644 --- a/drivers/ata/pata_pdc2027x.c +++ b/drivers/ata/pata_pdc2027x.c @@ -30,13 +30,6 @@ #define DRV_NAME "pata_pdc2027x" #define DRV_VERSION "1.0" -#undef PDC_DEBUG - -#ifdef PDC_DEBUG -#define PDPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) -#else -#define PDPRINTK(fmt, args...) -#endif enum { PDC_MMIO_BAR = 5, @@ -214,11 +207,11 @@ static int pdc2027x_cable_detect(struct ata_port *ap) if (cgcr & (1 << 26)) goto cbl40; - PDPRINTK("No cable or 80-conductor cable on port %d\n", ap->port_no); + ata_port_dbg(ap, "No cable or 80-conductor cable\n"); return ATA_CBL_PATA80; cbl40: - printk(KERN_INFO DRV_NAME ": 40-conductor cable detected on port %d\n", ap->port_no); + ata_port_info(ap, DRV_NAME ":40-conductor cable detected\n"); return ATA_CBL_PATA40; } @@ -292,17 +285,17 @@ static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev) unsigned int pio = adev->pio_mode - XFER_PIO_0; u32 ctcr0, ctcr1; - PDPRINTK("adev->pio_mode[%X]\n", adev->pio_mode); + ata_port_dbg(ap, "adev->pio_mode[%X]\n", adev->pio_mode); /* Sanity check */ if (pio > 4) { - printk(KERN_ERR DRV_NAME ": Unknown pio mode [%d] ignored\n", pio); + ata_port_err(ap, "Unknown pio mode [%d] ignored\n", pio); return; } /* Set the PIO timing registers using value table for 133MHz */ - PDPRINTK("Set pio regs... \n"); + ata_port_dbg(ap, "Set pio regs... \n"); ctcr0 = ioread32(dev_mmio(ap, adev, PDC_CTCR0)); ctcr0 &= 0xffff0000; @@ -315,9 +308,7 @@ static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev) ctcr1 |= (pdc2027x_pio_timing_tbl[pio].value2 << 24); iowrite32(ctcr1, dev_mmio(ap, adev, PDC_CTCR1)); - PDPRINTK("Set pio regs done\n"); - - PDPRINTK("Set to pio mode[%u] \n", pio); + ata_port_dbg(ap, "Set to pio mode[%u] \n", pio); } /** @@ -350,7 +341,7 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) iowrite32(ctcr1 & ~(1 << 7), dev_mmio(ap, adev, PDC_CTCR1)); } - PDPRINTK("Set udma regs... \n"); + ata_port_dbg(ap, "Set udma regs... \n"); ctcr1 = ioread32(dev_mmio(ap, adev, PDC_CTCR1)); ctcr1 &= 0xff000000; @@ -359,16 +350,14 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) (pdc2027x_udma_timing_tbl[udma_mode].value2 << 16); iowrite32(ctcr1, dev_mmio(ap, adev, PDC_CTCR1)); - PDPRINTK("Set udma regs done\n"); - - PDPRINTK("Set to udma mode[%u] \n", udma_mode); + ata_port_dbg(ap, "Set to udma mode[%u] \n", udma_mode); } else if ((dma_mode >= XFER_MW_DMA_0) && (dma_mode <= XFER_MW_DMA_2)) { /* Set the MDMA timing registers with value table for 133MHz */ unsigned int mdma_mode = dma_mode & 0x07; - PDPRINTK("Set mdma regs... \n"); + ata_port_dbg(ap, "Set mdma regs... \n"); ctcr0 = ioread32(dev_mmio(ap, adev, PDC_CTCR0)); ctcr0 &= 0x0000ffff; @@ -376,11 +365,10 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) (pdc2027x_mdma_timing_tbl[mdma_mode].value1 << 24); iowrite32(ctcr0, dev_mmio(ap, adev, PDC_CTCR0)); - PDPRINTK("Set mdma regs done\n"); - PDPRINTK("Set to mdma mode[%u] \n", mdma_mode); + ata_port_dbg(ap, "Set to mdma mode[%u] \n", mdma_mode); } else { - printk(KERN_ERR DRV_NAME ": Unknown dma mode [%u] ignored\n", dma_mode); + ata_port_err(ap, "Unknown dma mode [%u] ignored\n", dma_mode); } } @@ -414,7 +402,7 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed ctcr1 |= (1 << 25); iowrite32(ctcr1, dev_mmio(ap, dev, PDC_CTCR1)); - PDPRINTK("Turn on prefetch\n"); + ata_dev_dbg(dev, "Turn on prefetch\n"); } else { pdc2027x_set_dmamode(ap, dev); } @@ -485,8 +473,8 @@ retry: counter = (bccrh << 15) | bccrl; - PDPRINTK("bccrh [%X] bccrl [%X]\n", bccrh, bccrl); - PDPRINTK("bccrhv[%X] bccrlv[%X]\n", bccrhv, bccrlv); + dev_dbg(host->dev, "bccrh [%X] bccrl [%X]\n", bccrh, bccrl); + dev_dbg(host->dev, "bccrhv[%X] bccrlv[%X]\n", bccrhv, bccrlv); /* * The 30-bit decreasing counter are read by 2 pieces. @@ -495,7 +483,7 @@ retry: */ if (retry && !(bccrh == bccrhv && bccrl >= bccrlv)) { retry--; - PDPRINTK("rereading counter\n"); + dev_dbg(host->dev, "rereading counter\n"); goto retry; } @@ -520,20 +508,19 @@ static void pdc_adjust_pll(struct ata_host *host, long pll_clock, unsigned int b /* Sanity check */ if (unlikely(pll_clock_khz < 5000L || pll_clock_khz > 70000L)) { - printk(KERN_ERR DRV_NAME ": Invalid PLL input clock %ldkHz, give up!\n", pll_clock_khz); + dev_err(host->dev, "Invalid PLL input clock %ldkHz, give up!\n", + pll_clock_khz); return; } -#ifdef PDC_DEBUG - PDPRINTK("pout_required is %ld\n", pout_required); + dev_dbg(host->dev, "pout_required is %ld\n", pout_required); /* Show the current clock value of PLL control register * (maybe already configured by the firmware) */ pll_ctl = ioread16(mmio_base + PDC_PLL_CTL); - PDPRINTK("pll_ctl[%X]\n", pll_ctl); -#endif + dev_dbg(host->dev, "pll_ctl[%X]\n", pll_ctl); /* * Calculate the ratio of F, R and OD @@ -552,7 +539,7 @@ static void pdc_adjust_pll(struct ata_host *host, long pll_clock, unsigned int b R = 0x00; } else { /* Invalid ratio */ - printk(KERN_ERR DRV_NAME ": Invalid ratio %ld, give up!\n", ratio); + dev_err(host->dev, "Invalid ratio %ld, give up!\n", ratio); return; } @@ -560,15 +547,15 @@ static void pdc_adjust_pll(struct ata_host *host, long pll_clock, unsigned int b if (unlikely(F < 0 || F > 127)) { /* Invalid F */ - printk(KERN_ERR DRV_NAME ": F[%d] invalid!\n", F); + dev_err(host->dev, "F[%d] invalid!\n", F); return; } - PDPRINTK("F[%d] R[%d] ratio*1000[%ld]\n", F, R, ratio); + dev_dbg(host->dev, "F[%d] R[%d] ratio*1000[%ld]\n", F, R, ratio); pll_ctl = (R << 8) | F; - PDPRINTK("Writing pll_ctl[%X]\n", pll_ctl); + dev_dbg(host->dev, "Writing pll_ctl[%X]\n", pll_ctl); iowrite16(pll_ctl, mmio_base + PDC_PLL_CTL); ioread16(mmio_base + PDC_PLL_CTL); /* flush */ @@ -576,15 +563,13 @@ static void pdc_adjust_pll(struct ata_host *host, long pll_clock, unsigned int b /* Wait the PLL circuit to be stable */ msleep(30); -#ifdef PDC_DEBUG /* * Show the current clock value of PLL control register * (maybe configured by the firmware) */ pll_ctl = ioread16(mmio_base + PDC_PLL_CTL); - PDPRINTK("pll_ctl[%X]\n", pll_ctl); -#endif + dev_dbg(host->dev, "pll_ctl[%X]\n", pll_ctl); return; } @@ -605,7 +590,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host) /* Start the test mode */ scr = ioread32(mmio_base + PDC_SYS_CTL); - PDPRINTK("scr[%X]\n", scr); + dev_dbg(host->dev, "scr[%X]\n", scr); iowrite32(scr | (0x01 << 14), mmio_base + PDC_SYS_CTL); ioread32(mmio_base + PDC_SYS_CTL); /* flush */ @@ -622,7 +607,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host) /* Stop the test mode */ scr = ioread32(mmio_base + PDC_SYS_CTL); - PDPRINTK("scr[%X]\n", scr); + dev_dbg(host->dev, "scr[%X]\n", scr); iowrite32(scr & ~(0x01 << 14), mmio_base + PDC_SYS_CTL); ioread32(mmio_base + PDC_SYS_CTL); /* flush */ @@ -632,8 +617,8 @@ static long pdc_detect_pll_input_clock(struct ata_host *host) pll_clock = ((start_count - end_count) & 0x3fffffff) / 100 * (100000000 / usec_elapsed); - PDPRINTK("start[%ld] end[%ld] \n", start_count, end_count); - PDPRINTK("PLL input clock[%ld]Hz\n", pll_clock); + dev_dbg(host->dev, "start[%ld] end[%ld] PLL input clock[%ld]HZ\n", + start_count, end_count, pll_clock); return pll_clock; } From e1553351d747cbcd62db01d579dff916edcc782c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:46 +0100 Subject: [PATCH 246/493] ata: libata: remove pointless VPRINTK() calls Most of the information is already covered by tracepoints (if not downright pointless), so remove the VPRINTK() calls. And while we're at it, remove ata_scsi_dump_cdb(), too, as this information can be retrieved from scsi tracing. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 3 --- drivers/ata/libata-sata.c | 2 -- drivers/ata/libata-scsi.c | 42 --------------------------------------- drivers/ata/libata-sff.c | 4 ---- drivers/ata/libata.h | 1 - 5 files changed, 52 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c9552606b015..447a46bdc820 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4503,8 +4503,6 @@ static void ata_sg_clean(struct ata_queued_cmd *qc) WARN_ON_ONCE(sg == NULL); - VPRINTK("unmapping %u sg elements\n", qc->n_elem); - if (qc->n_elem) dma_unmap_sg(ap->dev, sg, qc->orig_n_elem, dir); @@ -4534,7 +4532,6 @@ static int ata_sg_setup(struct ata_queued_cmd *qc) if (n_elem < 1) return -1; - VPRINTK("%d sg elements mapped\n", n_elem); qc->orig_n_elem = qc->n_elem; qc->n_elem = n_elem; qc->flags |= ATA_QCFLAG_DMAMAP; diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index d9b5744a3b06..bfe9595d4f33 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1258,8 +1258,6 @@ int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap) { int rc = 0; - ata_scsi_dump_cdb(ap, cmd); - if (likely(ata_dev_enabled(ap->link.device))) rc = __ata_scsi_queuecmd(cmd, ap->link.device); else { diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index bfbb4cca4c17..11fb046e3035 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1299,8 +1299,6 @@ static void scsi_6_lba_len(const u8 *cdb, u64 *plba, u32 *plen) u64 lba = 0; u32 len; - VPRINTK("six-byte command\n"); - lba |= ((u64)(cdb[1] & 0x1f)) << 16; lba |= ((u64)cdb[2]) << 8; lba |= ((u64)cdb[3]); @@ -1326,8 +1324,6 @@ static void scsi_10_lba_len(const u8 *cdb, u64 *plba, u32 *plen) u64 lba = 0; u32 len = 0; - VPRINTK("ten-byte command\n"); - lba |= ((u64)cdb[2]) << 24; lba |= ((u64)cdb[3]) << 16; lba |= ((u64)cdb[4]) << 8; @@ -1355,8 +1351,6 @@ static void scsi_16_lba_len(const u8 *cdb, u64 *plba, u32 *plen) u64 lba = 0; u32 len = 0; - VPRINTK("sixteen-byte command\n"); - lba |= ((u64)cdb[2]) << 56; lba |= ((u64)cdb[3]) << 48; lba |= ((u64)cdb[4]) << 40; @@ -1706,8 +1700,6 @@ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd, struct ata_queued_cmd *qc; int rc; - VPRINTK("ENTER\n"); - qc = ata_scsi_qc_new(dev, cmd); if (!qc) goto err_mem; @@ -1738,7 +1730,6 @@ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd, /* select device, send command to hardware */ ata_qc_issue(qc); - VPRINTK("EXIT\n"); return 0; early_finish: @@ -1851,8 +1842,6 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) 2 }; - VPRINTK("ENTER\n"); - /* set scsi removable (RMB) bit per ata bit, or if the * AHCI port says it's external (Hotplug-capable, eSATA). */ @@ -2287,8 +2276,6 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) u8 dpofua, bp = 0xff; u16 fp; - VPRINTK("ENTER\n"); - six_byte = (scsicmd[0] == MODE_SENSE); ebd = !(scsicmd[1] & 0x8); /* dbd bit inverted == edb */ /* @@ -2406,8 +2393,6 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) log2_per_phys = ata_id_log2_per_physical_sector(dev->id); lowest_aligned = ata_id_logical_sector_offset(dev->id, log2_per_phys); - VPRINTK("ENTER\n"); - if (args->cmd->cmnd[0] == READ_CAPACITY) { if (last_lba >= 0xffffffffULL) last_lba = 0xffffffff; @@ -2474,7 +2459,6 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) */ static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf) { - VPRINTK("ENTER\n"); rbuf[3] = 8; /* just one lun, LUN 0, size 8 bytes */ return 0; @@ -2570,8 +2554,6 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc) struct scsi_cmnd *cmd = qc->scsicmd; unsigned int err_mask = qc->err_mask; - VPRINTK("ENTER, err_mask 0x%X\n", err_mask); - /* handle completion from new EH */ if (unlikely(qc->ap->ops->error_handler && (err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID))) { @@ -3680,8 +3662,6 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) u8 buffer[64]; const u8 *p = buffer; - VPRINTK("ENTER\n"); - six_byte = (cdb[0] == MODE_SELECT); if (six_byte) { if (scmd->cmd_len < 5) { @@ -3979,26 +3959,6 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) return NULL; } -/** - * ata_scsi_dump_cdb - dump SCSI command contents to dmesg - * @ap: ATA port to which the command was being sent - * @cmd: SCSI command to dump - * - * Prints the contents of a SCSI command via printk(). - */ - -void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd) -{ -#ifdef ATA_VERBOSE_DEBUG - struct scsi_device *scsidev = cmd->device; - - VPRINTK("CDB (%u:%d,%d,%lld) %9ph\n", - ap->print_id, - scsidev->channel, scsidev->id, scsidev->lun, - cmd->cmnd); -#endif -} - int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev) { u8 scsi_op = scmd->cmnd[0]; @@ -4077,8 +4037,6 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd) spin_lock_irqsave(ap->lock, irq_flags); - ata_scsi_dump_cdb(ap, cmd); - dev = ata_scsi_find_dev(ap, scsidev); if (likely(dev)) rc = __ata_scsi_queuecmd(cmd, dev); diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index b544bdc6d0a3..d5dbeb68b2bf 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -877,8 +877,6 @@ static void atapi_pio_bytes(struct ata_queued_cmd *qc) if (unlikely(!bytes)) goto atapi_check; - VPRINTK("ata%u: xfering %d bytes\n", ap->print_id, bytes); - if (unlikely(__atapi_pio_bytes(qc, bytes))) goto err_out; ata_sff_sync(ap); /* flush */ @@ -2586,7 +2584,6 @@ static void ata_bmdma_fill_sg(struct ata_queued_cmd *qc) prd[pi].addr = cpu_to_le32(addr); prd[pi].flags_len = cpu_to_le32(len & 0xffff); - VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len); pi++; sg_len -= len; @@ -2646,7 +2643,6 @@ static void ata_bmdma_fill_sg_dumb(struct ata_queued_cmd *qc) prd[++pi].addr = cpu_to_le32(addr + 0x8000); } prd[pi].flags_len = cpu_to_le32(blen); - VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len); pi++; sg_len -= len; diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 2144065e762c..51e01acdd241 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -148,7 +148,6 @@ extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, unsigned int id, u64 lun); void ata_scsi_sdev_config(struct scsi_device *sdev); int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev); -void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd); int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev); /* libata-eh.c */ From 93c7711494f47f9c829321e2a8711671b02f6e4c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:47 +0100 Subject: [PATCH 247/493] ata: ahci: Drop pointless VPRINTK() calls and convert the remaining ones Drop pointless VPRINTK() calls for entering and existing interrupt routines and convert the remaining calls to dev_dbg(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 4 +--- drivers/ata/ahci_xgene.c | 4 ---- drivers/ata/libahci.c | 18 ++++-------------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 3939afeca388..63bafb610fbd 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -684,7 +684,7 @@ static void ahci_pci_init_controller(struct ata_host *host) /* clear port IRQ */ tmp = readl(port_mmio + PORT_IRQ_STAT); - VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp); + dev_dbg(&pdev->dev, "PORT_IRQ_STAT 0x%x\n", tmp); if (tmp) writel(tmp, port_mmio + PORT_IRQ_STAT); } @@ -1469,7 +1469,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) u32 irq_stat, irq_masked; unsigned int handled = 1; - VPRINTK("ENTER\n"); hpriv = host->private_data; mmio = hpriv->mmio; irq_stat = readl(mmio + HOST_IRQ_STAT); @@ -1486,7 +1485,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) irq_stat = readl(mmio + HOST_IRQ_STAT); spin_unlock(&host->lock); } while (irq_stat); - VPRINTK("EXIT\n"); return IRQ_RETVAL(handled); } diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index dffc432b9d54..4d8a186ec12a 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -588,8 +588,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance) void __iomem *mmio; u32 irq_stat, irq_masked; - VPRINTK("ENTER\n"); - hpriv = host->private_data; mmio = hpriv->mmio; @@ -612,8 +610,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance) spin_unlock(&host->lock); - VPRINTK("EXIT\n"); - return IRQ_RETVAL(rc); } diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index b6f674a1fddc..0ed484e04fd6 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1234,12 +1234,12 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, /* clear SError */ tmp = readl(port_mmio + PORT_SCR_ERR); - VPRINTK("PORT_SCR_ERR 0x%x\n", tmp); + dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp); writel(tmp, port_mmio + PORT_SCR_ERR); /* clear port IRQ */ tmp = readl(port_mmio + PORT_IRQ_STAT); - VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp); + dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp); if (tmp) writel(tmp, port_mmio + PORT_IRQ_STAT); @@ -1270,10 +1270,10 @@ void ahci_init_controller(struct ata_host *host) } tmp = readl(mmio + HOST_CTL); - VPRINTK("HOST_CTL 0x%x\n", tmp); + dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp); writel(tmp | HOST_IRQ_EN, mmio + HOST_CTL); tmp = readl(mmio + HOST_CTL); - VPRINTK("HOST_CTL 0x%x\n", tmp); + dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp); } EXPORT_SYMBOL_GPL(ahci_init_controller); @@ -1911,8 +1911,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance) void __iomem *port_mmio = ahci_port_base(ap); u32 status; - VPRINTK("ENTER\n"); - status = readl(port_mmio + PORT_IRQ_STAT); writel(status, port_mmio + PORT_IRQ_STAT); @@ -1920,8 +1918,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance) ahci_handle_port_interrupt(ap, port_mmio, status); spin_unlock(ap->lock); - VPRINTK("EXIT\n"); - return IRQ_HANDLED; } @@ -1938,9 +1934,7 @@ u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked) ap = host->ports[i]; if (ap) { ahci_port_intr(ap); - VPRINTK("port %u\n", i); } else { - VPRINTK("port %u (no irq)\n", i); if (ata_ratelimit()) dev_warn(host->dev, "interrupt on disabled port %u\n", i); @@ -1961,8 +1955,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance) void __iomem *mmio; u32 irq_stat, irq_masked; - VPRINTK("ENTER\n"); - hpriv = host->private_data; mmio = hpriv->mmio; @@ -1990,8 +1982,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance) spin_unlock(&host->lock); - VPRINTK("EXIT\n"); - return IRQ_RETVAL(rc); } From 51d628f10d55fc3c18685f63000efbd5c848320d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:48 +0100 Subject: [PATCH 248/493] ata: pdc_adma: Drop pointless VPRINTK() calls and remove disabled NCQ debugging Drop pointless VPRINTK() calls for entering routines and setting up sg tables. And while we're at it, remove the disabled debugging messages. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pdc_adma.c | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c index 2c910c4cd4de..35b823ac20c9 100644 --- a/drivers/ata/pdc_adma.c +++ b/drivers/ata/pdc_adma.c @@ -284,9 +284,6 @@ static int adma_fill_sg(struct ata_queued_cmd *qc) *(__le32 *)(buf + i) = (pFLAGS & pEND) ? 0 : cpu_to_le32(pp->pkt_dma + i + 4); i += 4; - - VPRINTK("PRD[%u] = (0x%lX, 0x%X)\n", i/4, - (unsigned long)addr, len); } if (likely(last_buf)) @@ -302,8 +299,6 @@ static enum ata_completion_errors adma_qc_prep(struct ata_queued_cmd *qc) u32 pkt_dma = (u32)pp->pkt_dma; int i = 0; - VPRINTK("ENTER\n"); - adma_enter_reg_mode(qc->ap); if (qc->tf.protocol != ATA_PROT_DMA) return AC_ERR_OK; @@ -355,22 +350,6 @@ static enum ata_completion_errors adma_qc_prep(struct ata_queued_cmd *qc) i = adma_fill_sg(qc); wmb(); /* flush PRDs and pkt to memory */ -#if 0 - /* dump out CPB + PRDs for debug */ - { - int j, len = 0; - static char obuf[2048]; - for (j = 0; j < i; ++j) { - len += sprintf(obuf+len, "%02x ", buf[j]); - if ((j & 7) == 7) { - printk("%s\n", obuf); - len = 0; - } - } - if (len) - printk("%s\n", obuf); - } -#endif return AC_ERR_OK; } @@ -379,8 +358,6 @@ static inline void adma_packet_start(struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; void __iomem *chan = ADMA_PORT_REGS(ap); - VPRINTK("ENTER, ap %p\n", ap); - /* fire up the ADMA engine */ writew(aPIOMD4 | aGO, chan + ADMA_CONTROL); } @@ -502,14 +479,10 @@ static irqreturn_t adma_intr(int irq, void *dev_instance) struct ata_host *host = dev_instance; unsigned int handled = 0; - VPRINTK("ENTER\n"); - spin_lock(&host->lock); handled = adma_intr_pkt(host) | adma_intr_mmio(host); spin_unlock(&host->lock); - VPRINTK("EXIT\n"); - return IRQ_RETVAL(handled); } @@ -545,8 +518,8 @@ static int adma_port_start(struct ata_port *ap) return -ENOMEM; /* paranoia? */ if ((pp->pkt_dma & 7) != 0) { - printk(KERN_ERR "bad alignment for pp->pkt_dma: %08x\n", - (u32)pp->pkt_dma); + ata_port_err(ap, "bad alignment for pp->pkt_dma: %08x\n", + (u32)pp->pkt_dma); return -ENOMEM; } ap->private_data = pp; From d3e140f2b008e06072af9bfadf2294961bade897 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:49 +0100 Subject: [PATCH 249/493] ata: pata_octeon_cf: Drop pointless VPRINTK() calls and convert the remaining one Drop pointless VPRINTK() calls and convert the remaining calls to the existing bmdma tracepoint. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_octeon_cf.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index 07eda263b4c1..a2e7dcaa87ac 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -477,23 +477,11 @@ static void octeon_cf_tf_load16(struct ata_port *ap, __raw_writew(tf->hob_feature << 8, base + 0xc); __raw_writew(tf->hob_nsect | tf->hob_lbal << 8, base + 2); __raw_writew(tf->hob_lbam | tf->hob_lbah << 8, base + 4); - VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n", - tf->hob_feature, - tf->hob_nsect, - tf->hob_lbal, - tf->hob_lbam, - tf->hob_lbah); } if (is_addr) { __raw_writew(tf->feature << 8, base + 0xc); __raw_writew(tf->nsect | tf->lbal << 8, base + 2); __raw_writew(tf->lbam | tf->lbah << 8, base + 4); - VPRINTK("feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n", - tf->feature, - tf->nsect, - tf->lbal, - tf->lbam, - tf->lbah); } ata_wait_idle(ap); } @@ -553,8 +541,6 @@ static void octeon_cf_dma_start(struct ata_queued_cmd *qc) union cvmx_mio_boot_dma_intx mio_boot_dma_int; struct scatterlist *sg; - VPRINTK("%d scatterlists\n", qc->n_elem); - /* Get the scatter list entry we need to DMA into */ sg = qc->cursg; BUG_ON(!sg); @@ -595,10 +581,6 @@ static void octeon_cf_dma_start(struct ata_queued_cmd *qc) mio_boot_dma_cfg.s.adr = sg_dma_address(sg); - VPRINTK("%s %d bytes address=%p\n", - (mio_boot_dma_cfg.s.rw) ? "write" : "read", sg->length, - (void *)(unsigned long)mio_boot_dma_cfg.s.adr); - cvmx_write_csr(cf_port->dma_base + DMA_CFG, mio_boot_dma_cfg.u64); } @@ -617,9 +599,7 @@ static unsigned int octeon_cf_dma_finished(struct ata_port *ap, union cvmx_mio_boot_dma_intx dma_int; u8 status; - VPRINTK("ata%u: protocol %d task_state %d\n", - ap->print_id, qc->tf.protocol, ap->hsm_task_state); - + trace_ata_bmdma_stop(qc, &qc->tf, qc->tag); if (ap->hsm_task_state != HSM_ST_LAST) return 0; From 9913d3902f8f5e30984ae8a716e34935f553cc48 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:50 +0100 Subject: [PATCH 250/493] ata: pata_via: Drop pointless VPRINTK() calls Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_via.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 475032048984..439ca882f73c 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -414,12 +414,6 @@ static void via_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) iowrite8(tf->hob_lbal, ioaddr->lbal_addr); iowrite8(tf->hob_lbam, ioaddr->lbam_addr); iowrite8(tf->hob_lbah, ioaddr->lbah_addr); - VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n", - tf->hob_feature, - tf->hob_nsect, - tf->hob_lbal, - tf->hob_lbam, - tf->hob_lbah); } if (is_addr) { @@ -428,12 +422,6 @@ static void via_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) iowrite8(tf->lbal, ioaddr->lbal_addr); iowrite8(tf->lbam, ioaddr->lbam_addr); iowrite8(tf->lbah, ioaddr->lbah_addr); - VPRINTK("feat 0x%X nsect 0x%X lba 0x%X 0x%X 0x%X\n", - tf->feature, - tf->nsect, - tf->lbal, - tf->lbam, - tf->lbah); } ata_wait_idle(ap); From 156e67cc0dba1463fbc9bf3b327b642079b5a9fb Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:51 +0100 Subject: [PATCH 251/493] ata: sata_promise: Drop pointless VPRINTK() calls and convert the remaining ones Drop pointless VPRINTK() calls for entering and existing interrupt routines and convert the remaining calls to ata_port_dbg(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_promise.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index 7815da8ef9e5..b8465fef2ed2 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -596,7 +596,8 @@ static void pdc_fill_sg(struct ata_queued_cmd *qc) prd[idx].addr = cpu_to_le32(addr); prd[idx].flags_len = cpu_to_le32(len & 0xffff); - VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len); + ata_port_dbg(ap, "PRD[%u] = (0x%X, 0x%X)\n", + idx, addr, len); idx++; sg_len -= len; @@ -609,17 +610,16 @@ static void pdc_fill_sg(struct ata_queued_cmd *qc) if (len > SG_COUNT_ASIC_BUG) { u32 addr; - VPRINTK("Splitting last PRD.\n"); - addr = le32_to_cpu(prd[idx - 1].addr); prd[idx - 1].flags_len = cpu_to_le32(len - SG_COUNT_ASIC_BUG); - VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx - 1, addr, SG_COUNT_ASIC_BUG); + ata_port_dbg(ap, "PRD[%u] = (0x%X, 0x%X)\n", + idx - 1, addr, SG_COUNT_ASIC_BUG); addr = addr + len - SG_COUNT_ASIC_BUG; len = SG_COUNT_ASIC_BUG; prd[idx].addr = cpu_to_le32(addr); prd[idx].flags_len = cpu_to_le32(len); - VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len); + ata_port_dbg(ap, "PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len); idx++; } @@ -632,8 +632,6 @@ static enum ata_completion_errors pdc_qc_prep(struct ata_queued_cmd *qc) struct pdc_port_priv *pp = qc->ap->private_data; unsigned int i; - VPRINTK("ENTER\n"); - switch (qc->tf.protocol) { case ATA_PROT_DMA: pdc_fill_sg(qc); @@ -922,12 +920,8 @@ static irqreturn_t pdc_interrupt(int irq, void *dev_instance) u32 hotplug_status; int is_sataii_tx4; - VPRINTK("ENTER\n"); - - if (!host || !host->iomap[PDC_MMIO_BAR]) { - VPRINTK("QUICK EXIT\n"); + if (!host || !host->iomap[PDC_MMIO_BAR]) return IRQ_NONE; - } host_mmio = host->iomap[PDC_MMIO_BAR]; @@ -946,23 +940,18 @@ static irqreturn_t pdc_interrupt(int irq, void *dev_instance) /* reading should also clear interrupts */ mask = readl(host_mmio + PDC_INT_SEQMASK); - if (mask == 0xffffffff && hotplug_status == 0) { - VPRINTK("QUICK EXIT 2\n"); + if (mask == 0xffffffff && hotplug_status == 0) goto done_irq; - } mask &= 0xffff; /* only 16 SEQIDs possible */ - if (mask == 0 && hotplug_status == 0) { - VPRINTK("QUICK EXIT 3\n"); + if (mask == 0 && hotplug_status == 0) goto done_irq; - } writel(mask, host_mmio + PDC_INT_SEQMASK); is_sataii_tx4 = pdc_is_sataii_tx4(host->ports[0]->flags); for (i = 0; i < host->n_ports; i++) { - VPRINTK("port %u\n", i); ap = host->ports[i]; /* check for a plug or unplug event */ @@ -989,8 +978,6 @@ static irqreturn_t pdc_interrupt(int irq, void *dev_instance) } } - VPRINTK("EXIT\n"); - done_irq: spin_unlock(&host->lock); return IRQ_RETVAL(handled); @@ -1005,8 +992,6 @@ static void pdc_packet_start(struct ata_queued_cmd *qc) unsigned int port_no = ap->port_no; u8 seq = (u8) (port_no + 1); - VPRINTK("ENTER, ap %p\n", ap); - writel(0x00000001, host_mmio + (seq * 4)); readl(host_mmio + (seq * 4)); /* flush */ From 05d8501fbf063914c19c633e0e6078e9948c129b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:52 +0100 Subject: [PATCH 252/493] ata: sata_qstor: Drop pointless VPRINTK() calls Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_qstor.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c index 262b69549849..8ca0810aad26 100644 --- a/drivers/ata/sata_qstor.c +++ b/drivers/ata/sata_qstor.c @@ -252,9 +252,6 @@ static unsigned int qs_fill_sg(struct ata_queued_cmd *qc) len = sg_dma_len(sg); *(__le32 *)prd = cpu_to_le32(len); prd += sizeof(u64); - - VPRINTK("PRD[%u] = (0x%llX, 0x%X)\n", si, - (unsigned long long)addr, len); } return si; @@ -268,8 +265,6 @@ static enum ata_completion_errors qs_qc_prep(struct ata_queued_cmd *qc) u64 addr; unsigned int nelem; - VPRINTK("ENTER\n"); - qs_enter_reg_mode(qc->ap); if (qc->tf.protocol != ATA_PROT_DMA) return AC_ERR_OK; @@ -304,8 +299,6 @@ static inline void qs_packet_start(struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; u8 __iomem *chan = qs_mmio_base(ap->host) + (ap->port_no * 0x4000); - VPRINTK("ENTER, ap %p\n", ap); - writeb(QS_CTR0_CLER, chan + QS_CCT_CTR0); wmb(); /* flush PRDs and pkt to memory */ writel(QS_CCF_RUN_PKT, chan + QS_CCT_CFF); @@ -435,14 +428,10 @@ static irqreturn_t qs_intr(int irq, void *dev_instance) unsigned int handled = 0; unsigned long flags; - VPRINTK("ENTER\n"); - spin_lock_irqsave(&host->lock, flags); handled = qs_intr_pkt(host) | qs_intr_mmio(host); spin_unlock_irqrestore(&host->lock, flags); - VPRINTK("EXIT\n"); - return IRQ_RETVAL(handled); } From 559ba1830e4bd7ad71b44ab5d0f75d7a206f75ed Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:53 +0100 Subject: [PATCH 253/493] ata: sata_rcar: Drop pointless VPRINTK() calls Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_rcar.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 11e68e3854f8..91b39a6aa9f7 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -379,12 +379,6 @@ static void sata_rcar_tf_load(struct ata_port *ap, iowrite32(tf->hob_lbal, ioaddr->lbal_addr); iowrite32(tf->hob_lbam, ioaddr->lbam_addr); iowrite32(tf->hob_lbah, ioaddr->lbah_addr); - VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n", - tf->hob_feature, - tf->hob_nsect, - tf->hob_lbal, - tf->hob_lbam, - tf->hob_lbah); } if (is_addr) { @@ -393,18 +387,10 @@ static void sata_rcar_tf_load(struct ata_port *ap, iowrite32(tf->lbal, ioaddr->lbal_addr); iowrite32(tf->lbam, ioaddr->lbam_addr); iowrite32(tf->lbah, ioaddr->lbah_addr); - VPRINTK("feat 0x%X nsect 0x%X lba 0x%X 0x%X 0x%X\n", - tf->feature, - tf->nsect, - tf->lbal, - tf->lbam, - tf->lbah); } - if (tf->flags & ATA_TFLAG_DEVICE) { + if (tf->flags & ATA_TFLAG_DEVICE) iowrite32(tf->device, ioaddr->device_addr); - VPRINTK("device 0x%X\n", tf->device); - } ata_wait_idle(ap); } @@ -537,7 +523,6 @@ static void sata_rcar_bmdma_fill_sg(struct ata_queued_cmd *qc) prd[si].addr = cpu_to_le32(addr); prd[si].flags_len = cpu_to_le32(sg_len); - VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", si, addr, sg_len); } /* end-of-table flag */ From a0a8005d8642ce29596827a100c6cdc84bbbfb5c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:54 +0100 Subject: [PATCH 254/493] ata: sata_inic162x: Drop pointless VPRINTK() calls Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_inic162x.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index b6239dae524a..781901151d82 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -488,8 +488,6 @@ static enum ata_completion_errors inic_qc_prep(struct ata_queued_cmd *qc) bool is_data = ata_is_data(qc->tf.protocol); unsigned int cdb_len = 0; - VPRINTK("ENTER\n"); - if (is_atapi) cdb_len = qc->dev->cdb_len; From 23b87b9f6ffe89806f8707e5963e509881e2e0fd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:55 +0100 Subject: [PATCH 255/493] ata: sata_mv: Drop pointless VPRINTK() call and convert the remaining one Drop pointless VPRINTK() call and convert the remaining one to dev_dbg(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_mv.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index f00540641027..70743cd50a97 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -3716,11 +3716,6 @@ static void mv_port_init(struct ata_ioports *port, void __iomem *port_mmio) /* unmask all non-transient EDMA error interrupts */ writelfl(~EDMA_ERR_IRQ_TRANSIENT, port_mmio + EDMA_ERR_IRQ_MASK); - - VPRINTK("EDMA cfg=0x%08x EDMA IRQ err cause/mask=0x%08x/0x%08x\n", - readl(port_mmio + EDMA_CFG), - readl(port_mmio + EDMA_ERR_IRQ_CAUSE), - readl(port_mmio + EDMA_ERR_IRQ_MASK)); } static unsigned int mv_in_pcix_mode(struct ata_host *host) @@ -3965,7 +3960,7 @@ static int mv_init_host(struct ata_host *host) for (hc = 0; hc < n_hc; hc++) { void __iomem *hc_mmio = mv_hc_base(mmio, hc); - VPRINTK("HC%i: HC config=0x%08x HC IRQ cause " + dev_dbg(host->dev, "HC%i: HC config=0x%08x HC IRQ cause " "(before clear)=0x%08x\n", hc, readl(hc_mmio + HC_CFG), readl(hc_mmio + HC_IRQ_CAUSE)); From 47013c580c73c3870796ca5193a24e3334da4105 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:56 +0100 Subject: [PATCH 256/493] ata: sata_nv: drop pointless VPRINTK() calls and convert remaining ones Quite some information from the VPRINTK() is already covered by tracepoints, so remove the pointless calls and convert the remaining ones to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_nv.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 06d381b9764e..7f14d0d31057 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -809,7 +809,7 @@ static int nv_adma_check_cpb(struct ata_port *ap, int cpb_num, int force_err) struct nv_adma_port_priv *pp = ap->private_data; u8 flags = pp->cpb[cpb_num].resp_flags; - VPRINTK("CPB %d, flags=0x%x\n", cpb_num, flags); + ata_port_dbg(ap, "CPB %d, flags=0x%x\n", cpb_num, flags); if (unlikely((force_err || flags & (NV_CPB_RESP_ATA_ERR | @@ -1101,8 +1101,6 @@ static int nv_adma_port_start(struct ata_port *ap) struct pci_dev *pdev = to_pci_dev(dev); u16 tmp; - VPRINTK("ENTER\n"); - /* * Ensure DMA mask is set to 32-bit before allocating legacy PRD and * pad buffers. @@ -1191,7 +1189,6 @@ static void nv_adma_port_stop(struct ata_port *ap) struct nv_adma_port_priv *pp = ap->private_data; void __iomem *mmio = pp->ctl_block; - VPRINTK("ENTER\n"); writew(0, mmio + NV_ADMA_CTL); } @@ -1253,8 +1250,6 @@ static void nv_adma_setup_port(struct ata_port *ap) void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR]; struct ata_ioports *ioport = &ap->ioaddr; - VPRINTK("ENTER\n"); - mmio += NV_ADMA_PORT + ap->port_no * NV_ADMA_PORT_SIZE; ioport->cmd_addr = mmio; @@ -1375,8 +1370,6 @@ static enum ata_completion_errors nv_adma_qc_prep(struct ata_queued_cmd *qc) if (qc->tf.protocol == ATA_PROT_NCQ) ctl_flags |= NV_CPB_CTL_QUEUE | NV_CPB_CTL_FPDMA; - VPRINTK("qc->flags = 0x%lx\n", qc->flags); - nv_adma_tf_to_cpb(&qc->tf, cpb->tf); if (qc->flags & ATA_QCFLAG_DMAMAP) { @@ -1401,8 +1394,6 @@ static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc) void __iomem *mmio = pp->ctl_block; int curr_ncq = (qc->tf.protocol == ATA_PROT_NCQ); - VPRINTK("ENTER\n"); - /* We can't handle result taskfile with NCQ commands, since retrieving the taskfile switches us out of ADMA mode and would abort existing commands. */ @@ -1414,7 +1405,6 @@ static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc) if (nv_adma_use_reg_mode(qc)) { /* use ATA register mode */ - VPRINTK("using ATA register mode: 0x%lx\n", qc->flags); BUG_ON(!(pp->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) && (qc->flags & ATA_QCFLAG_DMAMAP)); nv_adma_register_mode(qc->ap); @@ -1866,12 +1856,12 @@ static void nv_swncq_host_init(struct ata_host *host) /* enable swncq */ tmp = readl(mmio + NV_CTL_MCP55); - VPRINTK("HOST_CTL:0x%X\n", tmp); + dev_dbg(&pdev->dev, "HOST_CTL:0x%X\n", tmp); writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55); /* enable irq intr */ tmp = readl(mmio + NV_INT_ENABLE_MCP55); - VPRINTK("HOST_ENABLE:0x%X\n", tmp); + dev_dbg(&pdev->dev, "HOST_ENABLE:0x%X\n", tmp); writel(tmp | 0x00fd00fd, mmio + NV_INT_ENABLE_MCP55); /* clear port irq */ @@ -2101,7 +2091,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap) ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask); if (!ap->qc_active) { - DPRINTK("over\n"); + ata_port_dbg(ap, "over\n"); nv_swncq_pp_reinit(ap); return 0; } @@ -2116,12 +2106,12 @@ static int nv_swncq_sdbfis(struct ata_port *ap) */ lack_dhfis = 1; - DPRINTK("id 0x%x QC: qc_active 0x%llx," - "SWNCQ:qc_active 0x%X defer_bits %X " - "dhfis 0x%X dmafis 0x%X last_issue_tag %x\n", - ap->print_id, ap->qc_active, pp->qc_active, - pp->defer_queue.defer_bits, pp->dhfis_bits, - pp->dmafis_bits, pp->last_issue_tag); + ata_port_dbg(ap, "QC: qc_active 0x%llx," + "SWNCQ:qc_active 0x%X defer_bits %X " + "dhfis 0x%X dmafis 0x%X last_issue_tag %x\n", + ap->qc_active, pp->qc_active, + pp->defer_queue.defer_bits, pp->dhfis_bits, + pp->dmafis_bits, pp->last_issue_tag); nv_swncq_fis_reinit(ap); @@ -2161,7 +2151,7 @@ static void nv_swncq_dmafis(struct ata_port *ap) __ata_bmdma_stop(ap); tag = nv_swncq_tag(ap); - DPRINTK("dma setup tag 0x%x\n", tag); + ata_port_dbg(ap, "dma setup tag 0x%x\n", tag); qc = ata_qc_from_tag(ap, tag); if (unlikely(!qc)) @@ -2229,9 +2219,9 @@ static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis) if (fis & NV_SWNCQ_IRQ_SDBFIS) { pp->ncq_flags |= ncq_saw_sdb; - DPRINTK("id 0x%x SWNCQ: qc_active 0x%X " + ata_port_dbg(ap, "SWNCQ: qc_active 0x%X " "dhfis 0x%X dmafis 0x%X sactive 0x%X\n", - ap->print_id, pp->qc_active, pp->dhfis_bits, + pp->qc_active, pp->dhfis_bits, pp->dmafis_bits, readl(pp->sactive_block)); if (nv_swncq_sdbfis(ap) < 0) goto irq_error; @@ -2257,7 +2247,7 @@ static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis) goto irq_exit; if (pp->defer_queue.defer_bits) { - DPRINTK("send next command\n"); + ata_port_dbg(ap, "send next command\n"); qc = nv_swncq_qc_from_dq(ap); nv_swncq_issue_atacmd(ap, qc); } From 14d3630608db7928b26a6e2272ff0e4d298ff910 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:57 +0100 Subject: [PATCH 257/493] ata: sata_fsl: convert VPRINTK() calls to ata_port_dbg() Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_fsl.c | 80 +++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 47 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 3afd727f1a4f..142e65d5efc7 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -406,7 +406,8 @@ static inline unsigned int sata_fsl_tag(struct ata_port *ap, return tag; } -static void sata_fsl_setup_cmd_hdr_entry(struct sata_fsl_port_priv *pp, +static void sata_fsl_setup_cmd_hdr_entry(struct ata_port *ap, + struct sata_fsl_port_priv *pp, unsigned int tag, u32 desc_info, u32 data_xfer_len, u8 num_prde, u8 fis_len) @@ -424,7 +425,7 @@ static void sata_fsl_setup_cmd_hdr_entry(struct sata_fsl_port_priv *pp, pp->cmdslot[tag].ttl = cpu_to_le32(data_xfer_len & ~0x03); pp->cmdslot[tag].desc_info = cpu_to_le32(desc_info | (tag & 0x1F)); - VPRINTK("cda=0x%x, prde_fis_len=0x%x, ttl=0x%x, di=0x%x\n", + ata_port_dbg(ap, "cda=0x%x, prde_fis_len=0x%x, ttl=0x%x, di=0x%x\n", pp->cmdslot[tag].cda, pp->cmdslot[tag].prde_fis_len, pp->cmdslot[tag].ttl, pp->cmdslot[tag].desc_info); @@ -450,8 +451,6 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc, dma_addr_t indirect_ext_segment_paddr; unsigned int si; - VPRINTK("SATA FSL : cd = 0x%p, prd = 0x%p\n", cmd_desc, prd); - indirect_ext_segment_paddr = cmd_desc_paddr + SATA_FSL_CMD_DESC_OFFSET_TO_PRDT + SATA_FSL_MAX_PRD_DIRECT * 16; @@ -459,9 +458,6 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc, dma_addr_t sg_addr = sg_dma_address(sg); u32 sg_len = sg_dma_len(sg); - VPRINTK("SATA FSL : fill_sg, sg_addr = 0x%llx, sg_len = %d\n", - (unsigned long long)sg_addr, sg_len); - /* warn if each s/g element is not dword aligned */ if (unlikely(sg_addr & 0x03)) ata_port_err(qc->ap, "s/g addr unaligned : 0x%llx\n", @@ -472,7 +468,6 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc, if (num_prde == (SATA_FSL_MAX_PRD_DIRECT - 1) && sg_next(sg) != NULL) { - VPRINTK("setting indirect prde\n"); prd_ptr_to_indirect_ext = prd; prd->dba = cpu_to_le32(indirect_ext_segment_paddr); indirect_ext_segment_sz = 0; @@ -484,9 +479,6 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc, prd->dba = cpu_to_le32(sg_addr); prd->ddc_and_ext = cpu_to_le32(data_snoop | (sg_len & ~0x03)); - VPRINTK("sg_fill, ttl=%d, dba=0x%x, ddc=0x%x\n", - ttl_dwords, prd->dba, prd->ddc_and_ext); - ++num_prde; ++prd; if (prd_ptr_to_indirect_ext) @@ -523,14 +515,6 @@ static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc) ata_tf_to_fis(&qc->tf, qc->dev->link->pmp, 1, (u8 *) &cd->cfis); - VPRINTK("Dumping cfis : 0x%x, 0x%x, 0x%x\n", - cd->cfis[0], cd->cfis[1], cd->cfis[2]); - - if (qc->tf.protocol == ATA_PROT_NCQ) { - VPRINTK("FPDMA xfer,Sctor cnt[0:7],[8:15] = %d,%d\n", - cd->cfis[3], cd->cfis[11]); - } - /* setup "ACMD - atapi command" in cmd. desc. if this is ATAPI cmd */ if (ata_is_atapi(qc->tf.protocol)) { desc_info |= ATAPI_CMD; @@ -546,10 +530,10 @@ static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc) if (qc->tf.protocol == ATA_PROT_NCQ) desc_info |= FPDMA_QUEUED_CMD; - sata_fsl_setup_cmd_hdr_entry(pp, tag, desc_info, ttl_dwords, + sata_fsl_setup_cmd_hdr_entry(ap, pp, tag, desc_info, ttl_dwords, num_prde, 5); - VPRINTK("SATA FSL : xx_qc_prep, di = 0x%x, ttl = %d, num_prde = %d\n", + ata_port_dbg(ap, "SATA FSL : di = 0x%x, ttl = %d, num_prde = %d\n", desc_info, ttl_dwords, num_prde); return AC_ERR_OK; @@ -562,7 +546,7 @@ static unsigned int sata_fsl_qc_issue(struct ata_queued_cmd *qc) void __iomem *hcr_base = host_priv->hcr_base; unsigned int tag = sata_fsl_tag(ap, qc->hw_tag, hcr_base); - VPRINTK("xx_qc_issue called,CQ=0x%x,CA=0x%x,CE=0x%x,CC=0x%x\n", + ata_port_dbg(ap, "CQ=0x%x,CA=0x%x,CE=0x%x,CC=0x%x\n", ioread32(CQ + hcr_base), ioread32(CA + hcr_base), ioread32(CE + hcr_base), ioread32(CC + hcr_base)); @@ -572,10 +556,10 @@ static unsigned int sata_fsl_qc_issue(struct ata_queued_cmd *qc) /* Simply queue command to the controller/device */ iowrite32(1 << tag, CQ + hcr_base); - VPRINTK("xx_qc_issue called, tag=%d, CQ=0x%x, CA=0x%x\n", + ata_port_dbg(ap, "tag=%d, CQ=0x%x, CA=0x%x\n", tag, ioread32(CQ + hcr_base), ioread32(CA + hcr_base)); - VPRINTK("CE=0x%x, DE=0x%x, CC=0x%x, CmdStat = 0x%x\n", + ata_port_dbg(ap, "CE=0x%x, DE=0x%x, CC=0x%x, CmdStat = 0x%x\n", ioread32(CE + hcr_base), ioread32(DE + hcr_base), ioread32(CC + hcr_base), @@ -616,7 +600,7 @@ static int sata_fsl_scr_write(struct ata_link *link, return -EINVAL; } - VPRINTK("xx_scr_write, reg_in = %d\n", sc_reg); + ata_link_dbg(link, "reg_in = %d\n", sc_reg); iowrite32(val, ssr_base + (sc_reg * 4)); return 0; @@ -640,7 +624,7 @@ static int sata_fsl_scr_read(struct ata_link *link, return -EINVAL; } - VPRINTK("xx_scr_read, reg_in = %d\n", sc_reg); + ata_link_dbg(link, "reg_in = %d\n", sc_reg); *val = ioread32(ssr_base + (sc_reg * 4)); return 0; @@ -652,18 +636,18 @@ static void sata_fsl_freeze(struct ata_port *ap) void __iomem *hcr_base = host_priv->hcr_base; u32 temp; - VPRINTK("xx_freeze, CQ=0x%x, CA=0x%x, CE=0x%x, DE=0x%x\n", + ata_port_dbg(ap, "CQ=0x%x, CA=0x%x, CE=0x%x, DE=0x%x\n", ioread32(CQ + hcr_base), ioread32(CA + hcr_base), ioread32(CE + hcr_base), ioread32(DE + hcr_base)); - VPRINTK("CmdStat = 0x%x\n", + ata_port_dbg(ap, "CmdStat = 0x%x\n", ioread32(host_priv->csr_base + COMMANDSTAT)); /* disable interrupts on the controller/port */ temp = ioread32(hcr_base + HCONTROL); iowrite32((temp & ~0x3F), hcr_base + HCONTROL); - VPRINTK("in xx_freeze : HControl = 0x%x, HStatus = 0x%x\n", + ata_port_dbg(ap, "HControl = 0x%x, HStatus = 0x%x\n", ioread32(hcr_base + HCONTROL), ioread32(hcr_base + HSTATUS)); } @@ -676,7 +660,7 @@ static void sata_fsl_thaw(struct ata_port *ap) /* ack. any pending IRQs for this controller/port */ temp = ioread32(hcr_base + HSTATUS); - VPRINTK("xx_thaw, pending IRQs = 0x%x\n", (temp & 0x3F)); + ata_port_dbg(ap, "pending IRQs = 0x%x\n", (temp & 0x3F)); if (temp & 0x3F) iowrite32((temp & 0x3F), hcr_base + HSTATUS); @@ -685,7 +669,7 @@ static void sata_fsl_thaw(struct ata_port *ap) temp = ioread32(hcr_base + HCONTROL); iowrite32((temp | DEFAULT_PORT_IRQ_ENABLE_MASK), hcr_base + HCONTROL); - VPRINTK("xx_thaw : HControl = 0x%x, HStatus = 0x%x\n", + ata_port_dbg(ap, "HControl = 0x%x, HStatus = 0x%x\n", ioread32(hcr_base + HCONTROL), ioread32(hcr_base + HSTATUS)); } @@ -747,8 +731,9 @@ static int sata_fsl_port_start(struct ata_port *ap) ap->private_data = pp; - VPRINTK("CHBA = 0x%x, cmdentry_phys = 0x%x\n", - pp->cmdslot_paddr, pp->cmdentry_paddr); + ata_port_dbg(ap, "CHBA = 0x%lx, cmdentry_phys = 0x%lx\n", + (unsigned long)pp->cmdslot_paddr, + (unsigned long)pp->cmdentry_paddr); /* Now, update the CHBA register in host controller cmd register set */ iowrite32(pp->cmdslot_paddr & 0xffffffff, hcr_base + CHBA); @@ -764,9 +749,9 @@ static int sata_fsl_port_start(struct ata_port *ap) temp = ioread32(hcr_base + HCONTROL); iowrite32((temp | HCONTROL_ONLINE_PHY_RST), hcr_base + HCONTROL); - VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); - VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); - VPRINTK("CHBA = 0x%x\n", ioread32(hcr_base + CHBA)); + ata_port_dbg(ap, "HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); + ata_port_dbg(ap, "HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); + ata_port_dbg(ap, "CHBA = 0x%x\n", ioread32(hcr_base + CHBA)); return 0; } @@ -806,9 +791,8 @@ static unsigned int sata_fsl_dev_classify(struct ata_port *ap) temp = ioread32(hcr_base + SIGNATURE); - VPRINTK("raw sig = 0x%x\n", temp); - VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); - VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); + ata_port_dbg(ap, "HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); + ata_port_dbg(ap, "HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); tf.lbah = (temp >> 24) & 0xff; tf.lbam = (temp >> 16) & 0xff; @@ -961,7 +945,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, cfis = (u8 *) &pp->cmdentry->cfis; /* device reset/SRST is a control register update FIS, uses tag0 */ - sata_fsl_setup_cmd_hdr_entry(pp, 0, + sata_fsl_setup_cmd_hdr_entry(ap, pp, 0, SRST_CMD | CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE, 0, 0, 5); tf.ctl |= ATA_SRST; /* setup SRST bit in taskfile control reg */ @@ -1011,8 +995,9 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, * using ATA signature D2H register FIS to the host controller. */ - sata_fsl_setup_cmd_hdr_entry(pp, 0, CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE, - 0, 0, 5); + sata_fsl_setup_cmd_hdr_entry(ap, pp, 0, + CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE, + 0, 0, 5); tf.ctl &= ~ATA_SRST; /* 2nd H2D Ctl. register FIS */ ata_tf_to_fis(&tf, pmp, 0, cfis); @@ -1042,8 +1027,8 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, *class = sata_fsl_dev_classify(ap); - VPRINTK("ccreg = 0x%x\n", ioread32(hcr_base + CC)); - VPRINTK("cereg = 0x%x\n", ioread32(hcr_base + CE)); + ata_port_dbg(ap, "ccreg = 0x%x\n", ioread32(hcr_base + CC)); + ata_port_dbg(ap, "cereg = 0x%x\n", ioread32(hcr_base + CE)); } return 0; @@ -1248,8 +1233,8 @@ static void sata_fsl_host_intr(struct ata_port *ap) return; } - VPRINTK("Status of all queues :\n"); - VPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x,CQ=0x%x,apqa=0x%llx\n", + ata_port_dbg(ap, "Status of all queues :\n"); + ata_port_dbg(ap, "done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x,CQ=0x%x,apqa=0x%llx\n", done_mask, ioread32(hcr_base + CA), ioread32(hcr_base + CE), @@ -1467,7 +1452,8 @@ static int sata_fsl_probe(struct platform_device *ofdev) iowrite32(temp | TRANSCFG_RX_WATER_MARK, csr_base + TRANSCFG); } - ata_port_dbg(ap, "@reset i/o = 0x%x\n", ioread32(csr_base + TRANSCFG)); + dev_dbg(&ofdev->dev, "@reset i/o = 0x%x\n", + ioread32(csr_base + TRANSCFG)); host_priv = kzalloc(sizeof(struct sata_fsl_host_priv), GFP_KERNEL); if (!host_priv) From 0b8e9cc71c237105340a40afc2a387e9ceffb595 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:58 +0100 Subject: [PATCH 258/493] ata: sata_sil: Drop pointless VPRINTK() calls Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_sil.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index 75321f1ceba5..3b989a52879d 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c @@ -307,7 +307,6 @@ static void sil_fill_sg(struct ata_queued_cmd *qc) prd->addr = cpu_to_le32(addr); prd->flags_len = cpu_to_le32(sg_len); - VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", si, addr, sg_len); last_prd = prd; prd++; From bc21c1056d08525d9c5a5d74db4b8f14e6691991 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:59 +0100 Subject: [PATCH 259/493] ata: sata_sx4: Drop pointless VPRINTK() calls and convert the remaining ones Drop pointless VPRINTK() calls for setting up SG tables and convert the remaining calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_sx4.c | 105 +++++++++++++---------------------------- 1 file changed, 34 insertions(+), 71 deletions(-) diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 4c01190a5e37..85e72c81a1de 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -308,15 +308,9 @@ static inline void pdc20621_ata_sg(u8 *buf, unsigned int portno, /* output ATA packet S/G table */ addr = PDC_20621_DIMM_BASE + PDC_20621_DIMM_DATA + (PDC_DIMM_DATA_STEP * portno); - VPRINTK("ATA sg addr 0x%x, %d\n", addr, addr); + buf32[dw] = cpu_to_le32(addr); buf32[dw + 1] = cpu_to_le32(total_len | ATA_PRD_EOT); - - VPRINTK("ATA PSG @ %x == (0x%x, 0x%x)\n", - PDC_20621_DIMM_BASE + - (PDC_DIMM_WINDOW_STEP * portno) + - PDC_DIMM_APKT_PRD, - buf32[dw], buf32[dw + 1]); } static inline void pdc20621_host_sg(u8 *buf, unsigned int portno, @@ -332,12 +326,6 @@ static inline void pdc20621_host_sg(u8 *buf, unsigned int portno, buf32[dw] = cpu_to_le32(addr); buf32[dw + 1] = cpu_to_le32(total_len | ATA_PRD_EOT); - - VPRINTK("HOST PSG @ %x == (0x%x, 0x%x)\n", - PDC_20621_DIMM_BASE + - (PDC_DIMM_WINDOW_STEP * portno) + - PDC_DIMM_HPKT_PRD, - buf32[dw], buf32[dw + 1]); } static inline unsigned int pdc20621_ata_pkt(struct ata_taskfile *tf, @@ -351,7 +339,6 @@ static inline unsigned int pdc20621_ata_pkt(struct ata_taskfile *tf, unsigned int dimm_sg = PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * portno) + PDC_DIMM_APKT_PRD; - VPRINTK("ENTER, dimm_sg == 0x%x, %d\n", dimm_sg, dimm_sg); i = PDC_DIMM_ATA_PKT; @@ -406,8 +393,6 @@ static inline void pdc20621_host_pkt(struct ata_taskfile *tf, u8 *buf, unsigned int dimm_sg = PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * portno) + PDC_DIMM_HPKT_PRD; - VPRINTK("ENTER, dimm_sg == 0x%x, %d\n", dimm_sg, dimm_sg); - VPRINTK("host_sg == 0x%x, %d\n", host_sg, host_sg); dw = PDC_DIMM_HOST_PKT >> 2; @@ -424,14 +409,6 @@ static inline void pdc20621_host_pkt(struct ata_taskfile *tf, u8 *buf, buf32[dw + 1] = cpu_to_le32(host_sg); buf32[dw + 2] = cpu_to_le32(dimm_sg); buf32[dw + 3] = 0; - - VPRINTK("HOST PKT @ %x == (0x%x 0x%x 0x%x 0x%x)\n", - PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * portno) + - PDC_DIMM_HOST_PKT, - buf32[dw + 0], - buf32[dw + 1], - buf32[dw + 2], - buf32[dw + 3]); } static void pdc20621_dma_prep(struct ata_queued_cmd *qc) @@ -447,8 +424,6 @@ static void pdc20621_dma_prep(struct ata_queued_cmd *qc) WARN_ON(!(qc->flags & ATA_QCFLAG_DMAMAP)); - VPRINTK("ata%u: ENTER\n", ap->print_id); - /* hard-code chip #0 */ mmio += PDC_CHIP0_OFS; @@ -492,7 +467,8 @@ static void pdc20621_dma_prep(struct ata_queued_cmd *qc) readl(dimm_mmio); /* MMIO PCI posting flush */ - VPRINTK("ata pkt buf ofs %u, prd size %u, mmio copied\n", i, sgt_len); + ata_port_dbg(ap, "ata pkt buf ofs %u, prd size %u, mmio copied\n", + i, sgt_len); } static void pdc20621_nodata_prep(struct ata_queued_cmd *qc) @@ -504,8 +480,6 @@ static void pdc20621_nodata_prep(struct ata_queued_cmd *qc) unsigned int portno = ap->port_no; unsigned int i; - VPRINTK("ata%u: ENTER\n", ap->print_id); - /* hard-code chip #0 */ mmio += PDC_CHIP0_OFS; @@ -527,7 +501,7 @@ static void pdc20621_nodata_prep(struct ata_queued_cmd *qc) readl(dimm_mmio); /* MMIO PCI posting flush */ - VPRINTK("ata pkt buf ofs %u, mmio copied\n", i); + ata_port_dbg(ap, "ata pkt buf ofs %u, mmio copied\n", i); } static enum ata_completion_errors pdc20621_qc_prep(struct ata_queued_cmd *qc) @@ -633,8 +607,6 @@ static void pdc20621_packet_start(struct ata_queued_cmd *qc) /* hard-code chip #0 */ mmio += PDC_CHIP0_OFS; - VPRINTK("ata%u: ENTER\n", ap->print_id); - wmb(); /* flush PRD, pkt writes */ port_ofs = PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * port_no); @@ -645,7 +617,7 @@ static void pdc20621_packet_start(struct ata_queued_cmd *qc) pdc20621_dump_hdma(qc); pdc20621_push_hdma(qc, seq, port_ofs + PDC_DIMM_HOST_PKT); - VPRINTK("queued ofs 0x%x (%u), seq %u\n", + ata_port_dbg(ap, "queued ofs 0x%x (%u), seq %u\n", port_ofs + PDC_DIMM_HOST_PKT, port_ofs + PDC_DIMM_HOST_PKT, seq); @@ -656,7 +628,7 @@ static void pdc20621_packet_start(struct ata_queued_cmd *qc) writel(port_ofs + PDC_DIMM_ATA_PKT, ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT); readl(ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT); - VPRINTK("submitted ofs 0x%x (%u), seq %u\n", + ata_port_dbg(ap, "submitted ofs 0x%x (%u), seq %u\n", port_ofs + PDC_DIMM_ATA_PKT, port_ofs + PDC_DIMM_ATA_PKT, seq); @@ -696,14 +668,12 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap, u8 status; unsigned int handled = 0; - VPRINTK("ENTER\n"); - if ((qc->tf.protocol == ATA_PROT_DMA) && /* read */ (!(qc->tf.flags & ATA_TFLAG_WRITE))) { /* step two - DMA from DIMM to host */ if (doing_hdma) { - VPRINTK("ata%u: read hdma, 0x%x 0x%x\n", ap->print_id, + ata_port_dbg(ap, "read hdma, 0x%x 0x%x\n", readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT)); /* get drive status; clear intr; complete txn */ qc->err_mask |= ac_err_mask(ata_wait_idle(ap)); @@ -714,7 +684,7 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap, /* step one - exec ATA command */ else { u8 seq = (u8) (port_no + 1 + 4); - VPRINTK("ata%u: read ata, 0x%x 0x%x\n", ap->print_id, + ata_port_dbg(ap, "read ata, 0x%x 0x%x\n", readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT)); /* submit hdma pkt */ @@ -729,7 +699,7 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap, /* step one - DMA from host to DIMM */ if (doing_hdma) { u8 seq = (u8) (port_no + 1); - VPRINTK("ata%u: write hdma, 0x%x 0x%x\n", ap->print_id, + ata_port_dbg(ap, "write hdma, 0x%x 0x%x\n", readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT)); /* submit ata pkt */ @@ -742,7 +712,7 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap, /* step two - execute ATA command */ else { - VPRINTK("ata%u: write ata, 0x%x 0x%x\n", ap->print_id, + ata_port_dbg(ap, "write ata, 0x%x 0x%x\n", readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT)); /* get drive status; clear intr; complete txn */ qc->err_mask |= ac_err_mask(ata_wait_idle(ap)); @@ -755,7 +725,7 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap, } else if (qc->tf.protocol == ATA_PROT_NODATA) { status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000); - DPRINTK("BUS_NODATA (drv_stat 0x%X)\n", status); + ata_port_dbg(ap, "BUS_NODATA (drv_stat 0x%X)\n", status); qc->err_mask |= ac_err_mask(status); ata_qc_complete(qc); handled = 1; @@ -781,29 +751,21 @@ static irqreturn_t pdc20621_interrupt(int irq, void *dev_instance) unsigned int handled = 0; void __iomem *mmio_base; - VPRINTK("ENTER\n"); - - if (!host || !host->iomap[PDC_MMIO_BAR]) { - VPRINTK("QUICK EXIT\n"); + if (!host || !host->iomap[PDC_MMIO_BAR]) return IRQ_NONE; - } mmio_base = host->iomap[PDC_MMIO_BAR]; /* reading should also clear interrupts */ mmio_base += PDC_CHIP0_OFS; mask = readl(mmio_base + PDC_20621_SEQMASK); - VPRINTK("mask == 0x%x\n", mask); - if (mask == 0xffffffff) { - VPRINTK("QUICK EXIT 2\n"); + if (mask == 0xffffffff) return IRQ_NONE; - } + mask &= 0xffff; /* only 16 tags possible */ - if (!mask) { - VPRINTK("QUICK EXIT 3\n"); + if (!mask) return IRQ_NONE; - } spin_lock(&host->lock); @@ -816,7 +778,8 @@ static irqreturn_t pdc20621_interrupt(int irq, void *dev_instance) else ap = host->ports[port_no]; tmp = mask & (1 << i); - VPRINTK("seq %u, port_no %u, ap %p, tmp %x\n", i, port_no, ap, tmp); + if (ap) + ata_port_dbg(ap, "seq %u, tmp %x\n", i, tmp); if (tmp && ap) { struct ata_queued_cmd *qc; @@ -829,10 +792,6 @@ static irqreturn_t pdc20621_interrupt(int irq, void *dev_instance) spin_unlock(&host->lock); - VPRINTK("mask == 0x%x\n", mask); - - VPRINTK("EXIT\n"); - return IRQ_RETVAL(handled); } @@ -1274,7 +1233,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) /* Initialize Time Period Register */ writel(0xffffffff, mmio + PDC_TIME_PERIOD); time_period = readl(mmio + PDC_TIME_PERIOD); - VPRINTK("Time Period Register (0x40): 0x%x\n", time_period); + dev_dbg(host->dev, "Time Period Register (0x40): 0x%x\n", time_period); /* Enable timer */ writel(PDC_TIMER_DEFAULT, mmio + PDC_TIME_CONTROL); @@ -1289,7 +1248,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) */ tcount = readl(mmio + PDC_TIME_COUNTER); - VPRINTK("Time Counter Register (0x44): 0x%x\n", tcount); + dev_dbg(host->dev, "Time Counter Register (0x44): 0x%x\n", tcount); /* If SX4 is on PCI-X bus, after 3 seconds, the timer counter @@ -1297,17 +1256,19 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) */ if (tcount >= PCI_X_TCOUNT) { ticks = (time_period - tcount); - VPRINTK("Num counters 0x%x (%d)\n", ticks, ticks); + dev_dbg(host->dev, "Num counters 0x%x (%d)\n", ticks, ticks); clock = (ticks / 300000); - VPRINTK("10 * Internal clk = 0x%x (%d)\n", clock, clock); + dev_dbg(host->dev, "10 * Internal clk = 0x%x (%d)\n", + clock, clock); clock = (clock * 33); - VPRINTK("10 * Internal clk * 33 = 0x%x (%d)\n", clock, clock); + dev_dbg(host->dev, "10 * Internal clk * 33 = 0x%x (%d)\n", + clock, clock); /* PLL F Param (bit 22:16) */ fparam = (1400000 / clock) - 2; - VPRINTK("PLL F Param: 0x%x (%d)\n", fparam, fparam); + dev_dbg(host->dev, "PLL F Param: 0x%x (%d)\n", fparam, fparam); /* OD param = 0x2 (bit 31:30), R param = 0x5 (bit 29:25) */ pci_status = (0x8a001824 | (fparam << 16)); @@ -1315,7 +1276,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) pci_status = PCI_PLL_INIT; /* Initialize PLL. */ - VPRINTK("pci_status: 0x%x\n", pci_status); + dev_dbg(host->dev, "pci_status: 0x%x\n", pci_status); writel(pci_status, mmio + PDC_CTL_STATUS); readl(mmio + PDC_CTL_STATUS); @@ -1327,15 +1288,16 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) printk(KERN_ERR "Detect Local DIMM Fail\n"); return 1; /* DIMM error */ } - VPRINTK("Local DIMM Speed = %d\n", speed); + dev_dbg(host->dev, "Local DIMM Speed = %d\n", speed); /* Programming DIMM0 Module Control Register (index_CID0:80h) */ size = pdc20621_prog_dimm0(host); - VPRINTK("Local DIMM Size = %dMB\n", size); + dev_dbg(host->dev, "Local DIMM Size = %dMB\n", size); /* Programming DIMM Module Global Control Register (index_CID0:88h) */ if (pdc20621_prog_dimm_global(host)) { - printk(KERN_ERR "Programming DIMM Module Global Control Register Fail\n"); + dev_err(host->dev, + "Programming DIMM Module Global Control Register Fail\n"); return 1; } @@ -1372,13 +1334,14 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE, &spd0)) { - pr_err("Failed in i2c read: device=%#x, subaddr=%#x\n", + dev_err(host->dev, + "Failed in i2c read: device=%#x, subaddr=%#x\n", PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE); return 1; } if (spd0 == 0x02) { void *buf; - VPRINTK("Start ECC initialization\n"); + dev_dbg(host->dev, "Start ECC initialization\n"); addr = 0; length = size * 1024 * 1024; buf = kzalloc(ECC_ERASE_BUF_SZ, GFP_KERNEL); @@ -1390,7 +1353,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) addr += ECC_ERASE_BUF_SZ; } kfree(buf); - VPRINTK("Finish ECC initialization\n"); + dev_dbg(host->dev, "Finish ECC initialization\n"); } return 0; } From f11c5403a1f0c5dc4cf8c38f14b26dc9abe8cf75 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:00 +0100 Subject: [PATCH 260/493] ata: sata_sx4: add module parameter 'dimm_test' Add module parameter 'dimm_test' to enable DIMM testing during startup. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_sx4.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 85e72c81a1de..5d7913644dfc 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -78,6 +78,9 @@ #define DRV_NAME "sata_sx4" #define DRV_VERSION "0.12" +static int dimm_test; +module_param(dimm_test, int, 0644); +MODULE_PARM_DESC(dimm_test, "Enable DIMM test during startup (1 = enabled)"); enum { PDC_MMIO_BAR = 3, @@ -211,10 +214,8 @@ static unsigned int pdc20621_i2c_read(struct ata_host *host, u32 device, u32 subaddr, u32 *pdata); static int pdc20621_prog_dimm0(struct ata_host *host); static unsigned int pdc20621_prog_dimm_global(struct ata_host *host); -#ifdef ATA_VERBOSE_DEBUG static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, u32 offset, u32 size); -#endif static void pdc20621_put_to_dimm(struct ata_host *host, void *psource, u32 offset, u32 size); static void pdc20621_irq_clear(struct ata_port *ap); @@ -575,7 +576,6 @@ static void pdc20621_pop_hdma(struct ata_queued_cmd *qc) pp->hdma_cons++; } -#ifdef ATA_VERBOSE_DEBUG static void pdc20621_dump_hdma(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; @@ -585,14 +585,10 @@ static void pdc20621_dump_hdma(struct ata_queued_cmd *qc) dimm_mmio += (port_no * PDC_DIMM_WINDOW_STEP); dimm_mmio += PDC_DIMM_HOST_PKT; - printk(KERN_ERR "HDMA[0] == 0x%08X\n", readl(dimm_mmio)); - printk(KERN_ERR "HDMA[1] == 0x%08X\n", readl(dimm_mmio + 4)); - printk(KERN_ERR "HDMA[2] == 0x%08X\n", readl(dimm_mmio + 8)); - printk(KERN_ERR "HDMA[3] == 0x%08X\n", readl(dimm_mmio + 12)); + ata_port_dbg(ap, "HDMA 0x%08X 0x%08X 0x%08X 0x%08X\n", + readl(dimm_mmio), readl(dimm_mmio + 4), + readl(dimm_mmio + 8), readl(dimm_mmio + 12)); } -#else -static inline void pdc20621_dump_hdma(struct ata_queued_cmd *qc) { } -#endif /* ATA_VERBOSE_DEBUG */ static void pdc20621_packet_start(struct ata_queued_cmd *qc) { @@ -938,7 +934,6 @@ static void pdc_sata_setup_port(struct ata_ioports *port, void __iomem *base) } -#ifdef ATA_VERBOSE_DEBUG static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, u32 offset, u32 size) { @@ -988,7 +983,6 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, memcpy_fromio(psource, dimm_mmio, size / 4); } } -#endif static void pdc20621_put_to_dimm(struct ata_host *host, void *psource, @@ -1301,8 +1295,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) return 1; } -#ifdef ATA_VERBOSE_DEBUG - { + if (dimm_test) { u8 test_parttern1[40] = {0x55,0xAA,'P','r','o','m','i','s','e',' ', 'N','o','t',' ','Y','e','t',' ', @@ -1316,19 +1309,20 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) pdc20621_put_to_dimm(host, test_parttern1, 0x10040, 40); pdc20621_get_from_dimm(host, test_parttern2, 0x40, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], + dev_info(host->dev, "DIMM test pattern 1: %x, %x, %s\n", test_parttern2[0], test_parttern2[1], &(test_parttern2[2])); pdc20621_get_from_dimm(host, test_parttern2, 0x10040, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], - test_parttern2[1], &(test_parttern2[2])); + dev_info(host->dev, "DIMM test pattern 2: %x, %x, %s\n", + test_parttern2[0], + test_parttern2[1], &(test_parttern2[2])); pdc20621_put_to_dimm(host, test_parttern1, 0x40, 40); pdc20621_get_from_dimm(host, test_parttern2, 0x40, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], - test_parttern2[1], &(test_parttern2[2])); + dev_info(host->dev, "DIMM test pattern 3: %x, %x, %s\n", + test_parttern2[0], + test_parttern2[1], &(test_parttern2[2])); } -#endif /* ECC initiliazation. */ From d97c75edd806669c9f4b56c0ddae37725c0b708c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:01 +0100 Subject: [PATCH 261/493] ata: libata: drop ata_msg_error() and ata_msg_intr() Unused. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 6 +++--- include/linux/libata.h | 4 ---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 447a46bdc820..165210576288 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5345,11 +5345,11 @@ struct ata_port *ata_port_alloc(struct ata_host *host) #if defined(ATA_VERBOSE_DEBUG) /* turn on all debugging levels */ - ap->msg_enable = 0x00FF; + ap->msg_enable = 0x003F; #elif defined(ATA_DEBUG) - ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO | ATA_MSG_CTL | ATA_MSG_WARN | ATA_MSG_ERR; + ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO | ATA_MSG_CTL | ATA_MSG_WARN; #else - ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN; + ap->msg_enable = ATA_MSG_DRV | ATA_MSG_WARN; #endif mutex_init(&ap->scsi_scan_mutex); diff --git a/include/linux/libata.h b/include/linux/libata.h index 39cdde0b9491..4f0a85f4e69a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -78,8 +78,6 @@ enum { ATA_MSG_WARN = 0x0008, ATA_MSG_MALLOC = 0x0010, ATA_MSG_CTL = 0x0020, - ATA_MSG_INTR = 0x0040, - ATA_MSG_ERR = 0x0080, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) @@ -88,8 +86,6 @@ enum { #define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) #define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC) #define ata_msg_ctl(p) ((p)->msg_enable & ATA_MSG_CTL) -#define ata_msg_intr(p) ((p)->msg_enable & ATA_MSG_INTR) -#define ata_msg_err(p) ((p)->msg_enable & ATA_MSG_ERR) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { From 5cef96b4207e01c9cdb7752acaa178056fe94632 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:02 +0100 Subject: [PATCH 262/493] ata: libata: drop ata_msg_ctl() The one caller have been converted to dynamic debugging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 7 ++----- include/linux/libata.h | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 165210576288..5b50a6d0d6eb 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1763,9 +1763,6 @@ int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class, int may_fallback = 1, tried_spinup = 0; int rc; - if (ata_msg_ctl(ap)) - ata_dev_dbg(dev, "%s: ENTER\n", __func__); - retry: ata_tf_init(dev, &tf); @@ -5345,9 +5342,9 @@ struct ata_port *ata_port_alloc(struct ata_host *host) #if defined(ATA_VERBOSE_DEBUG) /* turn on all debugging levels */ - ap->msg_enable = 0x003F; + ap->msg_enable = 0x001F; #elif defined(ATA_DEBUG) - ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO | ATA_MSG_CTL | ATA_MSG_WARN; + ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO | ATA_MSG_WARN; #else ap->msg_enable = ATA_MSG_DRV | ATA_MSG_WARN; #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index 4f0a85f4e69a..e384cce62963 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -77,7 +77,6 @@ enum { ATA_MSG_PROBE = 0x0004, ATA_MSG_WARN = 0x0008, ATA_MSG_MALLOC = 0x0010, - ATA_MSG_CTL = 0x0020, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) @@ -85,7 +84,6 @@ enum { #define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) #define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) #define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC) -#define ata_msg_ctl(p) ((p)->msg_enable & ATA_MSG_CTL) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { From 2f784b923d50cdef1f6bd24d7c18614321b0833a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:03 +0100 Subject: [PATCH 263/493] ata: libata: drop ata_msg_malloc() Unused. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 2 +- include/linux/libata.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 5b50a6d0d6eb..3accab132492 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5342,7 +5342,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host) #if defined(ATA_VERBOSE_DEBUG) /* turn on all debugging levels */ - ap->msg_enable = 0x001F; + ap->msg_enable = 0x000F; #elif defined(ATA_DEBUG) ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO | ATA_MSG_WARN; #else diff --git a/include/linux/libata.h b/include/linux/libata.h index e384cce62963..5651bbf4902b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -76,14 +76,12 @@ enum { ATA_MSG_INFO = 0x0002, ATA_MSG_PROBE = 0x0004, ATA_MSG_WARN = 0x0008, - ATA_MSG_MALLOC = 0x0010, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) #define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) #define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) #define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) -#define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { From 16d424672716dc886fb58ec4a47a408db4781cc0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:04 +0100 Subject: [PATCH 264/493] ata: libata: drop ata_msg_warn() The WARN level was always enabled, so drop ata_msg_warn(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 19 ++++++++----------- include/linux/libata.h | 2 -- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 3accab132492..b5334e0a8603 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1571,9 +1571,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, else ata_qc_complete(qc); - if (ata_msg_warn(ap)) - ata_dev_warn(dev, "qc timeout (cmd 0x%x)\n", - command); + ata_dev_warn(dev, "qc timeout (cmd 0x%x)\n", + command); } spin_unlock_irqrestore(ap->lock, flags); @@ -1932,9 +1931,8 @@ retry: return 0; err_out: - if (ata_msg_warn(ap)) - ata_dev_warn(dev, "failed to IDENTIFY (%s, err_mask=0x%x)\n", - reason, err_mask); + ata_dev_warn(dev, "failed to IDENTIFY (%s, err_mask=0x%x)\n", + reason, err_mask); return rc; } @@ -2683,8 +2681,7 @@ int ata_dev_configure(struct ata_device *dev) rc = atapi_cdb_len(id); if ((rc < 12) || (rc > ATAPI_CDB_LEN)) { - if (ata_msg_warn(ap)) - ata_dev_warn(dev, "unsupported CDB len\n"); + ata_dev_warn(dev, "unsupported CDB len %d\n", rc); rc = -EINVAL; goto err_out_nosup; } @@ -5342,11 +5339,11 @@ struct ata_port *ata_port_alloc(struct ata_host *host) #if defined(ATA_VERBOSE_DEBUG) /* turn on all debugging levels */ - ap->msg_enable = 0x000F; + ap->msg_enable = 0x0007; #elif defined(ATA_DEBUG) - ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO | ATA_MSG_WARN; + ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO; #else - ap->msg_enable = ATA_MSG_DRV | ATA_MSG_WARN; + ap->msg_enable = ATA_MSG_DRV; #endif mutex_init(&ap->scsi_scan_mutex); diff --git a/include/linux/libata.h b/include/linux/libata.h index 5651bbf4902b..0e5ed2ff94be 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -75,13 +75,11 @@ enum { ATA_MSG_DRV = 0x0001, ATA_MSG_INFO = 0x0002, ATA_MSG_PROBE = 0x0004, - ATA_MSG_WARN = 0x0008, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) #define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) #define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) -#define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { From 17a1e1be2fc7dc99945b41df0485037dcb6044d0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:05 +0100 Subject: [PATCH 265/493] ata: libata: drop ata_msg_probe() All callsites have been converted to dynamic debugging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-acpi.c | 23 +++++++---------------- drivers/ata/libata-core.c | 20 +++++++------------- drivers/ata/libata-sff.c | 4 ---- include/linux/libata.h | 2 -- 4 files changed, 14 insertions(+), 35 deletions(-) diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 9e1e62b9cf63..8cfa8c96bb13 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -402,7 +402,6 @@ EXPORT_SYMBOL_GPL(ata_acpi_stm); */ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf) { - struct ata_port *ap = dev->link->ap; acpi_status status; struct acpi_buffer output; union acpi_object *out_obj; @@ -418,10 +417,6 @@ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf) output.length = ACPI_ALLOCATE_BUFFER; output.pointer = NULL; /* ACPI-CA sets this; save/free it later */ - if (ata_msg_probe(ap)) - ata_dev_dbg(dev, "%s: ENTER: port#: %d\n", - __func__, ap->port_no); - /* _GTF has no input parameters */ status = acpi_evaluate_object(ata_dev_acpi_handle(dev), "_GTF", NULL, &output); @@ -437,11 +432,9 @@ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf) } if (!output.length || !output.pointer) { - if (ata_msg_probe(ap)) - ata_dev_dbg(dev, "%s: Run _GTF: length or ptr is NULL (0x%llx, 0x%p)\n", - __func__, - (unsigned long long)output.length, - output.pointer); + ata_dev_dbg(dev, "Run _GTF: length or ptr is NULL (0x%llx, 0x%p)\n", + (unsigned long long)output.length, + output.pointer); rc = -EINVAL; goto out_free; } @@ -464,9 +457,8 @@ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf) rc = out_obj->buffer.length / REGS_PER_GTF; if (gtf) { *gtf = (void *)out_obj->buffer.pointer; - if (ata_msg_probe(ap)) - ata_dev_dbg(dev, "%s: returning gtf=%p, gtf_count=%d\n", - __func__, *gtf, rc); + ata_dev_dbg(dev, "returning gtf=%p, gtf_count=%d\n", + *gtf, rc); } return rc; @@ -778,9 +770,8 @@ static int ata_acpi_push_id(struct ata_device *dev) struct acpi_object_list input; union acpi_object in_params[1]; - if (ata_msg_probe(ap)) - ata_dev_dbg(dev, "%s: ix = %d, port#: %d\n", - __func__, dev->devno, ap->port_no); + ata_dev_dbg(dev, "%s: ix = %d, port#: %d\n", + __func__, dev->devno, ap->port_no); /* Give the drive Identify data to the drive via the _SDD method */ /* _SDD: set up input parameters */ diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index b5334e0a8603..623a6f272c7e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2535,9 +2535,6 @@ int ata_dev_configure(struct ata_device *dev) return 0; } - if (ata_msg_probe(ap)) - ata_dev_dbg(dev, "%s: ENTER\n", __func__); - /* set horkage */ dev->horkage |= ata_dev_blacklisted(dev); ata_force_horkage(dev); @@ -2585,13 +2582,12 @@ int ata_dev_configure(struct ata_device *dev) return rc; /* print device capabilities */ - if (ata_msg_probe(ap)) - ata_dev_dbg(dev, - "%s: cfg 49:%04x 82:%04x 83:%04x 84:%04x " - "85:%04x 86:%04x 87:%04x 88:%04x\n", - __func__, - id[49], id[82], id[83], id[84], - id[85], id[86], id[87], id[88]); + ata_dev_dbg(dev, + "%s: cfg 49:%04x 82:%04x 83:%04x 84:%04x " + "85:%04x 86:%04x 87:%04x 88:%04x\n", + __func__, + id[49], id[82], id[83], id[84], + id[85], id[86], id[87], id[88]); /* initialize to-be-configured parameters */ dev->flags &= ~ATA_DFLAG_CFG_MASK; @@ -2791,8 +2787,6 @@ int ata_dev_configure(struct ata_device *dev) return 0; err_out_nosup: - if (ata_msg_probe(ap)) - ata_dev_dbg(dev, "%s: EXIT, err\n", __func__); return rc; } @@ -5339,7 +5333,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host) #if defined(ATA_VERBOSE_DEBUG) /* turn on all debugging levels */ - ap->msg_enable = 0x0007; + ap->msg_enable = 0x0003; #elif defined(ATA_DEBUG) ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO; #else diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index d5dbeb68b2bf..01f1673f3297 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -330,10 +330,6 @@ EXPORT_SYMBOL_GPL(ata_sff_dev_select); static void ata_dev_select(struct ata_port *ap, unsigned int device, unsigned int wait, unsigned int can_sleep) { - if (ata_msg_probe(ap)) - ata_port_info(ap, "ata_dev_select: ENTER, device %u, wait %u\n", - device, wait); - if (wait) ata_wait_idle(ap); diff --git a/include/linux/libata.h b/include/linux/libata.h index 0e5ed2ff94be..455d7e77e562 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -74,12 +74,10 @@ enum { ATA_MSG_DRV = 0x0001, ATA_MSG_INFO = 0x0002, - ATA_MSG_PROBE = 0x0004, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) #define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) -#define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { From 96c810f216cb6da15bfa8fe8ef3bf73ca91c5dd8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:06 +0100 Subject: [PATCH 266/493] ata: libata: drop ata_msg_info() Convert the sole caller to ata_dev_dbg() and remove the definition. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 10 +++------- include/linux/libata.h | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 623a6f272c7e..80ca94eb3ce0 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2530,8 +2530,8 @@ int ata_dev_configure(struct ata_device *dev) char modelbuf[ATA_ID_PROD_LEN+1]; int rc; - if (!ata_dev_enabled(dev) && ata_msg_info(ap)) { - ata_dev_info(dev, "%s: ENTER/EXIT -- nodev\n", __func__); + if (!ata_dev_enabled(dev)) { + ata_dev_dbg(dev, "no device\n"); return 0; } @@ -5333,11 +5333,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host) #if defined(ATA_VERBOSE_DEBUG) /* turn on all debugging levels */ - ap->msg_enable = 0x0003; -#elif defined(ATA_DEBUG) - ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO; -#else - ap->msg_enable = ATA_MSG_DRV; + ap->msg_enable = 0x0001; #endif mutex_init(&ap->scsi_scan_mutex); diff --git a/include/linux/libata.h b/include/linux/libata.h index 455d7e77e562..524d09b1dc82 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -73,11 +73,9 @@ enum { ATA_MSG_DRV = 0x0001, - ATA_MSG_INFO = 0x0002, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) -#define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { From 1c95a27c1e544f723f6e0e5a4384098f92996ec0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:07 +0100 Subject: [PATCH 267/493] ata: libata: drop ata_msg_drv() Callers are already protected by ata_dev_print_info(), so no need to have an additional configuration parameter here. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 19 ++++++------------- drivers/ata/libata-eh.c | 3 +-- include/linux/libata.h | 6 ------ 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 80ca94eb3ce0..9c2947905d1e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2354,7 +2354,6 @@ static void ata_dev_config_trusted(struct ata_device *dev) static int ata_dev_config_lba(struct ata_device *dev) { - struct ata_port *ap = dev->link->ap; const u16 *id = dev->id; const char *lba_desc; char ncq_desc[24]; @@ -2376,7 +2375,7 @@ static int ata_dev_config_lba(struct ata_device *dev) ret = ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc)); /* print device info to dmesg */ - if (ata_msg_drv(ap) && ata_dev_print_info(dev)) + if (ata_dev_print_info(dev)) ata_dev_info(dev, "%llu sectors, multi %u: %s %s\n", (unsigned long long)dev->n_sectors, @@ -2387,7 +2386,6 @@ static int ata_dev_config_lba(struct ata_device *dev) static void ata_dev_config_chs(struct ata_device *dev) { - struct ata_port *ap = dev->link->ap; const u16 *id = dev->id; if (ata_id_current_chs_valid(id)) { @@ -2403,7 +2401,7 @@ static void ata_dev_config_chs(struct ata_device *dev) } /* print device info to dmesg */ - if (ata_msg_drv(ap) && ata_dev_print_info(dev)) + if (ata_dev_print_info(dev)) ata_dev_info(dev, "%llu sectors, multi %u, CHS %u/%u/%u\n", (unsigned long long)dev->n_sectors, @@ -2644,7 +2642,7 @@ int ata_dev_configure(struct ata_device *dev) } /* print device info to dmesg */ - if (ata_msg_drv(ap) && print_info) + if (print_info) ata_dev_info(dev, "%s: %s, %s, max %s\n", revbuf, modelbuf, fwrevbuf, ata_mode_string(xfer_mask)); @@ -2664,7 +2662,7 @@ int ata_dev_configure(struct ata_device *dev) ata_dev_config_cpr(dev); dev->cdb_len = 32; - if (ata_msg_drv(ap) && print_info) + if (print_info) ata_dev_print_features(dev); } @@ -2721,7 +2719,7 @@ int ata_dev_configure(struct ata_device *dev) } /* print device info to dmesg */ - if (ata_msg_drv(ap) && print_info) + if (print_info) ata_dev_info(dev, "ATAPI: %s, %s, max %s%s%s%s\n", modelbuf, fwrevbuf, @@ -2738,7 +2736,7 @@ int ata_dev_configure(struct ata_device *dev) /* Limit PATA drive on SATA cable bridge transfers to udma5, 200 sectors */ if (ata_dev_knobble(dev)) { - if (ata_msg_drv(ap) && print_info) + if (print_info) ata_dev_info(dev, "applying bridge limits\n"); dev->udma_mask &= ATA_UDMA5; dev->max_sectors = ATA_MAX_SECTORS; @@ -5331,11 +5329,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host) ap->host = host; ap->dev = host->dev; -#if defined(ATA_VERBOSE_DEBUG) - /* turn on all debugging levels */ - ap->msg_enable = 0x0001; -#endif - mutex_init(&ap->scsi_scan_mutex); INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug); INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 8bf52a6239aa..7951fd946bf9 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1214,8 +1214,7 @@ void ata_dev_disable(struct ata_device *dev) if (!ata_dev_enabled(dev)) return; - if (ata_msg_drv(dev->link->ap)) - ata_dev_warn(dev, "disabled\n"); + ata_dev_warn(dev, "disable device\n"); ata_acpi_on_disable(dev); ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); dev->class++; diff --git a/include/linux/libata.h b/include/linux/libata.h index 524d09b1dc82..65172609a005 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -71,12 +71,6 @@ /* NEW: debug levels */ #define HAVE_LIBATA_MSG 1 -enum { - ATA_MSG_DRV = 0x0001, -}; - -#define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) - static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { if (dval < 0 || dval >= (sizeof(u32) * 8)) From db45905e74e6ae035305719bc683eca40f526669 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:08 +0100 Subject: [PATCH 268/493] ata: libata: remove 'new' ata message handling Remove the remaining bits for the 'new' ata message handling. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/include/linux/libata.h b/include/linux/libata.h index 65172609a005..145c0132b75e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -68,18 +68,6 @@ } \ }) -/* NEW: debug levels */ -#define HAVE_LIBATA_MSG 1 - -static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) -{ - if (dval < 0 || dval >= (sizeof(u32) * 8)) - return default_msg_enable_bits; /* should be 0x1 - only driver info msgs */ - if (!dval) - return 0; - return (1 << dval) - 1; -} - /* defines only for the constants which don't work well as enums */ #define ATA_TAG_POISON 0xfafbfcfdU @@ -864,7 +852,6 @@ struct ata_port { unsigned int hsm_task_state; - u32 msg_enable; struct list_head eh_done_q; wait_queue_head_t eh_wait_q; int eh_tries; From 870bb833c0acb29d8471eac5c2d2e6274826dbb6 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:09 +0100 Subject: [PATCH 269/493] ata: libata: remove debug compilation switches Unused now, so remove and drop any references to them. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-sff.c | 1 - drivers/ata/pata_ep93xx.c | 1 - drivers/ata/sata_rcar.c | 1 - include/linux/libata.h | 16 ---------------- 4 files changed, 19 deletions(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 01f1673f3297..75217828dfe3 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -2091,7 +2091,6 @@ void ata_sff_drain_fifo(struct ata_queued_cmd *qc) && count < 65536; count += 2) ioread16(ap->ioaddr.data_addr); - /* Can become DEBUG later */ if (count) ata_port_dbg(ap, "drained %d bytes to clear DRQ\n", count); diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c index 46208ececbb6..b78f71c70f27 100644 --- a/drivers/ata/pata_ep93xx.c +++ b/drivers/ata/pata_ep93xx.c @@ -855,7 +855,6 @@ static void ep93xx_pata_drain_fifo(struct ata_queued_cmd *qc) && count < 65536; count += 2) ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_DATA); - /* Can become DEBUG later */ if (count) ata_port_dbg(ap, "drained %d bytes to clear DRQ.\n", count); diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 91b39a6aa9f7..3d96b6faa3f0 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -479,7 +479,6 @@ static void sata_rcar_drain_fifo(struct ata_queued_cmd *qc) count < 65536; count += 2) ioread32(ap->ioaddr.data_addr); - /* Can become DEBUG later */ if (count) ata_port_dbg(ap, "drained %d bytes to clear DRQ\n", count); } diff --git a/include/linux/libata.h b/include/linux/libata.h index 145c0132b75e..c258f69106f4 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -39,25 +39,9 @@ * compile-time options: to be removed as soon as all the drivers are * converted to the new debugging mechanism */ -#undef ATA_DEBUG /* debugging output */ -#undef ATA_VERBOSE_DEBUG /* yet more debugging output */ #undef ATA_IRQ_TRAP /* define to ack screaming irqs */ -#undef ATA_NDEBUG /* define to disable quick runtime checks */ -/* note: prints function name for you */ -#ifdef ATA_DEBUG -#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) -#ifdef ATA_VERBOSE_DEBUG -#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) -#else -#define VPRINTK(fmt, args...) -#endif /* ATA_VERBOSE_DEBUG */ -#else -#define DPRINTK(fmt, args...) -#define VPRINTK(fmt, args...) -#endif /* ATA_DEBUG */ - #define ata_print_version_once(dev, version) \ ({ \ static bool __print_once; \ From f2f01a52f28121770c5cd48352a60b87e1fa204b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:10 +0100 Subject: [PATCH 270/493] ata: pata_atp867x: convert printk() calls Convert printk() calls to structured logging. [Damien] Fix ata_port_dbg() format in atp867x_check_ports() to avoid compile warnings with 32-bits arch. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_atp867x.c | 105 ++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c index 2bc5fc81efe3..779d660415c8 100644 --- a/drivers/ata/pata_atp867x.c +++ b/drivers/ata/pata_atp867x.c @@ -155,7 +155,7 @@ static int atp867x_get_active_clocks_shifted(struct ata_port *ap, case 1 ... 6: break; default: - printk(KERN_WARNING "ATP867X: active %dclk is invalid. " + ata_port_warn(ap, "ATP867X: active %dclk is invalid. " "Using 12clk.\n", clk); fallthrough; case 9 ... 12: @@ -171,7 +171,8 @@ active_clock_shift_done: return clocks << ATP867X_IO_PIOSPD_ACTIVE_SHIFT; } -static int atp867x_get_recover_clocks_shifted(unsigned int clk) +static int atp867x_get_recover_clocks_shifted(struct ata_port *ap, + unsigned int clk) { unsigned char clocks = clk; @@ -188,7 +189,7 @@ static int atp867x_get_recover_clocks_shifted(unsigned int clk) case 15: break; default: - printk(KERN_WARNING "ATP867X: recover %dclk is invalid. " + ata_port_warn(ap, "ATP867X: recover %dclk is invalid. " "Using default 12clk.\n", clk); fallthrough; case 12: /* default 12 clk */ @@ -225,7 +226,7 @@ static void atp867x_set_piomode(struct ata_port *ap, struct ata_device *adev) iowrite8(b, dp->dma_mode); b = atp867x_get_active_clocks_shifted(ap, t.active) | - atp867x_get_recover_clocks_shifted(t.recover); + atp867x_get_recover_clocks_shifted(ap, t.recover); if (adev->devno & 1) iowrite8(b, dp->slave_piospd); @@ -233,7 +234,7 @@ static void atp867x_set_piomode(struct ata_port *ap, struct ata_device *adev) iowrite8(b, dp->mstr_piospd); b = atp867x_get_active_clocks_shifted(ap, t.act8b) | - atp867x_get_recover_clocks_shifted(t.rec8b); + atp867x_get_recover_clocks_shifted(ap, t.rec8b); iowrite8(b, dp->eightb_piospd); } @@ -270,7 +271,6 @@ static struct ata_port_operations atp867x_ops = { }; -#ifdef ATP867X_DEBUG static void atp867x_check_res(struct pci_dev *pdev) { int i; @@ -280,7 +280,7 @@ static void atp867x_check_res(struct pci_dev *pdev) for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { start = pci_resource_start(pdev, i); len = pci_resource_len(pdev, i); - printk(KERN_DEBUG "ATP867X: resource start:len=%lx:%lx\n", + dev_dbg(&pdev->dev, "ATP867X: resource start:len=%lx:%lx\n", start, len); } } @@ -290,49 +290,48 @@ static void atp867x_check_ports(struct ata_port *ap, int port) struct ata_ioports *ioaddr = &ap->ioaddr; struct atp867x_priv *dp = ap->private_data; - printk(KERN_DEBUG "ATP867X: port[%d] addresses\n" - " cmd_addr =0x%llx, 0x%llx\n" - " ctl_addr =0x%llx, 0x%llx\n" - " bmdma_addr =0x%llx, 0x%llx\n" - " data_addr =0x%llx\n" - " error_addr =0x%llx\n" - " feature_addr =0x%llx\n" - " nsect_addr =0x%llx\n" - " lbal_addr =0x%llx\n" - " lbam_addr =0x%llx\n" - " lbah_addr =0x%llx\n" - " device_addr =0x%llx\n" - " status_addr =0x%llx\n" - " command_addr =0x%llx\n" - " dp->dma_mode =0x%llx\n" - " dp->mstr_piospd =0x%llx\n" - " dp->slave_piospd =0x%llx\n" - " dp->eightb_piospd =0x%llx\n" + ata_port_dbg(ap, "ATP867X: port[%d] addresses\n" + " cmd_addr =0x%lx, 0x%lx\n" + " ctl_addr =0x%lx, 0x%lx\n" + " bmdma_addr =0x%lx, 0x%lx\n" + " data_addr =0x%lx\n" + " error_addr =0x%lx\n" + " feature_addr =0x%lx\n" + " nsect_addr =0x%lx\n" + " lbal_addr =0x%lx\n" + " lbam_addr =0x%lx\n" + " lbah_addr =0x%lx\n" + " device_addr =0x%lx\n" + " status_addr =0x%lx\n" + " command_addr =0x%lx\n" + " dp->dma_mode =0x%lx\n" + " dp->mstr_piospd =0x%lx\n" + " dp->slave_piospd =0x%lx\n" + " dp->eightb_piospd =0x%lx\n" " dp->pci66mhz =0x%lx\n", port, - (unsigned long long)ioaddr->cmd_addr, - (unsigned long long)ATP867X_IO_PORTBASE(ap, port), - (unsigned long long)ioaddr->ctl_addr, - (unsigned long long)ATP867X_IO_ALTSTATUS(ap, port), - (unsigned long long)ioaddr->bmdma_addr, - (unsigned long long)ATP867X_IO_DMABASE(ap, port), - (unsigned long long)ioaddr->data_addr, - (unsigned long long)ioaddr->error_addr, - (unsigned long long)ioaddr->feature_addr, - (unsigned long long)ioaddr->nsect_addr, - (unsigned long long)ioaddr->lbal_addr, - (unsigned long long)ioaddr->lbam_addr, - (unsigned long long)ioaddr->lbah_addr, - (unsigned long long)ioaddr->device_addr, - (unsigned long long)ioaddr->status_addr, - (unsigned long long)ioaddr->command_addr, - (unsigned long long)dp->dma_mode, - (unsigned long long)dp->mstr_piospd, - (unsigned long long)dp->slave_piospd, - (unsigned long long)dp->eightb_piospd, + (unsigned long)ioaddr->cmd_addr, + (unsigned long)ATP867X_IO_PORTBASE(ap, port), + (unsigned long)ioaddr->ctl_addr, + (unsigned long)ATP867X_IO_ALTSTATUS(ap, port), + (unsigned long)ioaddr->bmdma_addr, + (unsigned long)ATP867X_IO_DMABASE(ap, port), + (unsigned long)ioaddr->data_addr, + (unsigned long)ioaddr->error_addr, + (unsigned long)ioaddr->feature_addr, + (unsigned long)ioaddr->nsect_addr, + (unsigned long)ioaddr->lbal_addr, + (unsigned long)ioaddr->lbam_addr, + (unsigned long)ioaddr->lbah_addr, + (unsigned long)ioaddr->device_addr, + (unsigned long)ioaddr->status_addr, + (unsigned long)ioaddr->command_addr, + (unsigned long)dp->dma_mode, + (unsigned long)dp->mstr_piospd, + (unsigned long)dp->slave_piospd, + (unsigned long)dp->eightb_piospd, (unsigned long)dp->pci66mhz); } -#endif static int atp867x_set_priv(struct ata_port *ap) { @@ -370,8 +369,7 @@ static void atp867x_fixup(struct ata_host *host) if (v < 0x80) { v = 0x80; pci_write_config_byte(pdev, PCI_LATENCY_TIMER, v); - printk(KERN_DEBUG "ATP867X: set latency timer of device %s" - " to %d\n", pci_name(pdev), v); + dev_dbg(&pdev->dev, "ATP867X: set latency timer to %d\n", v); } /* @@ -419,13 +417,11 @@ static int atp867x_ata_pci_sff_init_host(struct ata_host *host) return rc; host->iomap = pcim_iomap_table(pdev); -#ifdef ATP867X_DEBUG atp867x_check_res(pdev); for (i = 0; i < PCI_STD_NUM_BARS; i++) - printk(KERN_DEBUG "ATP867X: iomap[%d]=0x%llx\n", i, - (unsigned long long)(host->iomap[i])); -#endif + dev_dbg(gdev, "ATP867X: iomap[%d]=0x%p\n", i, + host->iomap[i]); /* * request, iomap BARs and init port addresses accordingly @@ -444,9 +440,8 @@ static int atp867x_ata_pci_sff_init_host(struct ata_host *host) if (rc) return rc; -#ifdef ATP867X_DEBUG atp867x_check_ports(ap, i); -#endif + ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx", (unsigned long)ioaddr->cmd_addr, (unsigned long)ioaddr->ctl_addr); @@ -486,7 +481,7 @@ static int atp867x_init_one(struct pci_dev *pdev, if (rc) return rc; - printk(KERN_INFO "ATP867X: ATP867 ATA UDMA133 controller (rev %02X)", + dev_info(&pdev->dev, "ATP867X: ATP867 ATA UDMA133 controller (rev %02X)", pdev->device); host = ata_host_alloc_pinfo(&pdev->dev, ppi, ATP867X_NUM_PORTS); From 0f1c1294c78d1510490e466e167a668dfc0ac5ae Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:11 +0100 Subject: [PATCH 271/493] ata: pata_cmd640: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_cmd640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c index d0bcabb58b44..1a3372a72213 100644 --- a/drivers/ata/pata_cmd640.c +++ b/drivers/ata/pata_cmd640.c @@ -61,7 +61,7 @@ static void cmd640_set_piomode(struct ata_port *ap, struct ata_device *adev) struct ata_device *pair = ata_dev_pair(adev); if (ata_timing_compute(adev, adev->pio_mode, &t, T, 0) < 0) { - printk(KERN_ERR DRV_NAME ": mode computation failed.\n"); + ata_dev_err(adev, DRV_NAME ": mode computation failed.\n"); return; } From 8705cb7f1b49e03b721ff1891331263dad83f875 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:12 +0100 Subject: [PATCH 272/493] ata: pata_cmd64x: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_cmd64x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c index 1d74d89b5bed..5baa4a7819c1 100644 --- a/drivers/ata/pata_cmd64x.c +++ b/drivers/ata/pata_cmd64x.c @@ -116,7 +116,7 @@ static void cmd64x_set_timing(struct ata_port *ap, struct ata_device *adev, u8 m /* ata_timing_compute is smart and will produce timings for MWDMA that don't violate the drives PIO capabilities. */ if (ata_timing_compute(adev, mode, &t, T, 0) < 0) { - printk(KERN_ERR DRV_NAME ": mode computation failed.\n"); + ata_dev_err(adev, DRV_NAME ": mode computation failed.\n"); return; } if (ap->port_no) { @@ -130,7 +130,7 @@ static void cmd64x_set_timing(struct ata_port *ap, struct ata_device *adev, u8 m } } - printk(KERN_DEBUG DRV_NAME ": active %d recovery %d setup %d.\n", + ata_dev_dbg(adev, DRV_NAME ": active %d recovery %d setup %d.\n", t.active, t.recover, t.setup); if (t.recover > 16) { t.active += t.recover - 16; From 56f7979e770b21b1b420b82ddd83a1b4a301fdb5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:13 +0100 Subject: [PATCH 273/493] ata: pata_cs5520: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_cs5520.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c index 247c14702624..24ce8665b1f9 100644 --- a/drivers/ata/pata_cs5520.c +++ b/drivers/ata/pata_cs5520.c @@ -153,12 +153,12 @@ static int cs5520_init_one(struct pci_dev *pdev, const struct pci_device_id *id) /* Perform set up for DMA */ if (pci_enable_device_io(pdev)) { - printk(KERN_ERR DRV_NAME ": unable to configure BAR2.\n"); + dev_err(&pdev->dev, "unable to configure BAR2.\n"); return -ENODEV; } if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) { - printk(KERN_ERR DRV_NAME ": unable to configure DMA mask.\n"); + dev_err(&pdev->dev, "unable to configure DMA mask.\n"); return -ENODEV; } From 0d43bff5196d2d2c00055470281a11ccfafa740f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:14 +0100 Subject: [PATCH 274/493] ata: pata_cs5536: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_cs5536.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c index 760ac6e65216..ab47aeb5587f 100644 --- a/drivers/ata/pata_cs5536.c +++ b/drivers/ata/pata_cs5536.c @@ -263,12 +263,12 @@ static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id) ppi[1] = &ata_dummy_port_info; if (use_msr) - printk(KERN_ERR DRV_NAME ": Using MSR regs instead of PCI\n"); + dev_err(&dev->dev, DRV_NAME ": Using MSR regs instead of PCI\n"); cs5536_read(dev, CFG, &cfg); if ((cfg & IDE_CFG_CHANEN) == 0) { - printk(KERN_ERR DRV_NAME ": disabled by BIOS\n"); + dev_err(&dev->dev, DRV_NAME ": disabled by BIOS\n"); return -ENODEV; } From 3dede7f9b37fbf7e0471e1d14f8eff540fcc87ea Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:15 +0100 Subject: [PATCH 275/493] ata: pata_cypress: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_cypress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c index 5b3a7a8ebef6..3be5d52a777b 100644 --- a/drivers/ata/pata_cypress.c +++ b/drivers/ata/pata_cypress.c @@ -62,7 +62,7 @@ static void cy82c693_set_piomode(struct ata_port *ap, struct ata_device *adev) u32 addr; if (ata_timing_compute(adev, adev->pio_mode, &t, T, 1) < 0) { - printk(KERN_ERR DRV_NAME ": mome computation failed.\n"); + ata_dev_err(adev, DRV_NAME ": mome computation failed.\n"); return; } From 3697aaafc368b66e5d76b749d2b0275a03ce6af1 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:16 +0100 Subject: [PATCH 276/493] ata: pata_it821x: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_it821x.c | 43 ++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c index 0e2265978a34..b77ef0046dbe 100644 --- a/drivers/ata/pata_it821x.c +++ b/drivers/ata/pata_it821x.c @@ -431,7 +431,8 @@ static unsigned int it821x_smart_qc_issue(struct ata_queued_cmd *qc) case ATA_CMD_SET_FEATURES: return ata_bmdma_qc_issue(qc); } - printk(KERN_DEBUG "it821x: can't process command 0x%02X\n", qc->tf.command); + ata_dev_dbg(qc->dev, "it821x: can't process command 0x%02X\n", + qc->tf.command); return AC_ERR_DEV; } @@ -507,12 +508,14 @@ static void it821x_dev_config(struct ata_device *adev) if (strstr(model_num, "Integrated Technology Express")) { /* RAID mode */ - ata_dev_info(adev, "%sRAID%d volume", - adev->id[147] ? "Bootable " : "", - adev->id[129]); - if (adev->id[129] != 1) - pr_cont("(%dK stripe)", adev->id[146]); - pr_cont("\n"); + if (adev->id[129] == 1) + ata_dev_info(adev, "%sRAID%d volume\n", + adev->id[147] ? "Bootable " : "", + adev->id[129]); + else + ata_dev_info(adev, "%sRAID%d volume (%dK stripe)\n", + adev->id[147] ? "Bootable " : "", + adev->id[129], adev->id[146]); } /* This is a controller firmware triggered funny, don't report the drive faulty! */ @@ -593,6 +596,7 @@ static int it821x_check_atapi_dma(struct ata_queued_cmd *qc) /** * it821x_display_disk - display disk setup + * @ap: ATA port * @n: Device number * @buf: Buffer block from firmware * @@ -600,7 +604,7 @@ static int it821x_check_atapi_dma(struct ata_queued_cmd *qc) * by the firmware. */ -static void it821x_display_disk(int n, u8 *buf) +static void it821x_display_disk(struct ata_port *ap, int n, u8 *buf) { unsigned char id[41]; int mode = 0; @@ -633,13 +637,13 @@ static void it821x_display_disk(int n, u8 *buf) else strcpy(mbuf, "PIO"); if (buf[52] == 4) - printk(KERN_INFO "%d: %-6s %-8s %s %s\n", + ata_port_info(ap, "%d: %-6s %-8s %s %s\n", n, mbuf, types[buf[52]], id, cbl); else - printk(KERN_INFO "%d: %-6s %-8s Volume: %1d %s %s\n", + ata_port_info(ap, "%d: %-6s %-8s Volume: %1d %s %s\n", n, mbuf, types[buf[52]], buf[53], id, cbl); if (buf[125] < 100) - printk(KERN_INFO "%d: Rebuilding: %d%%\n", n, buf[125]); + ata_port_info(ap, "%d: Rebuilding: %d%%\n", n, buf[125]); } /** @@ -676,7 +680,7 @@ static u8 *it821x_firmware_command(struct ata_port *ap, u8 cmd, int len) status = ioread8(ap->ioaddr.status_addr); if (status & ATA_ERR) { kfree(buf); - printk(KERN_ERR "it821x_firmware_command: rejected\n"); + ata_port_err(ap, "%s: rejected\n", __func__); return NULL; } if (status & ATA_DRQ) { @@ -686,7 +690,7 @@ static u8 *it821x_firmware_command(struct ata_port *ap, u8 cmd, int len) usleep_range(500, 1000); } kfree(buf); - printk(KERN_ERR "it821x_firmware_command: timeout\n"); + ata_port_err(ap, "%s: timeout\n", __func__); return NULL; } @@ -709,13 +713,13 @@ static void it821x_probe_firmware(struct ata_port *ap) buf = it821x_firmware_command(ap, 0xFA, 512); if (buf != NULL) { - printk(KERN_INFO "pata_it821x: Firmware %02X/%02X/%02X%02X\n", + ata_port_info(ap, "pata_it821x: Firmware %02X/%02X/%02X%02X\n", buf[505], buf[506], buf[507], buf[508]); for (i = 0; i < 4; i++) - it821x_display_disk(i, buf + 128 * i); + it821x_display_disk(ap, i, buf + 128 * i); kfree(buf); } } @@ -771,7 +775,8 @@ static int it821x_port_start(struct ata_port *ap) itdev->timing10 = 1; /* Need to disable ATAPI DMA for this case */ if (!itdev->smart) - printk(KERN_WARNING DRV_NAME": Revision 0x10, workarounds activated.\n"); + dev_warn(&pdev->dev, + "Revision 0x10, workarounds activated.\n"); } return 0; @@ -919,14 +924,14 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id) } else { /* Force the card into bypass mode if so requested */ if (it8212_noraid) { - printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n"); + dev_info(&pdev->dev, "forcing bypass mode.\n"); it821x_disable_raid(pdev); } pci_read_config_byte(pdev, 0x50, &conf); conf &= 1; - printk(KERN_INFO DRV_NAME": controller in %s mode.\n", - mode[conf]); + dev_info(&pdev->dev, "controller in %s mode.\n", mode[conf]); + if (conf == 0) ppi[0] = &info_passthru; else From 21f0e60a925ba76ad2ff0c2cd9fbead1fd2cbca0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:17 +0100 Subject: [PATCH 277/493] ata: pata_marvell: convert printk() calls Convert the printk() call to structured logging and drop the pointless PCI bar debug messages. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_marvell.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c index 361597d14c56..0c5a51970fbf 100644 --- a/drivers/ata/pata_marvell.c +++ b/drivers/ata/pata_marvell.c @@ -32,7 +32,6 @@ static int marvell_pata_active(struct pci_dev *pdev) { - int i; u32 devices; void __iomem *barp; @@ -44,11 +43,6 @@ static int marvell_pata_active(struct pci_dev *pdev) if (barp == NULL) return -ENOMEM; - printk("BAR5:"); - for(i = 0; i <= 0x0F; i++) - printk("%02X:%02X ", i, ioread8(barp + i)); - printk("\n"); - devices = ioread32(barp + 0x0C); pci_iounmap(pdev, barp); @@ -149,7 +143,8 @@ static int marvell_init_one (struct pci_dev *pdev, const struct pci_device_id *i #if IS_ENABLED(CONFIG_SATA_AHCI) if (!marvell_pata_active(pdev)) { - printk(KERN_INFO DRV_NAME ": PATA port not active, deferring to AHCI driver.\n"); + dev_info(&pdev->dev, + "PATA port not active, deferring to AHCI driver.\n"); return -ENODEV; } #endif From 71306ae27c8716f92852dd18978fc5171c26849f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:18 +0100 Subject: [PATCH 278/493] ata: pata_rz1000: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_rz1000.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c index 3722a67083fd..fb00c3e5fd19 100644 --- a/drivers/ata/pata_rz1000.c +++ b/drivers/ata/pata_rz1000.c @@ -69,7 +69,7 @@ static int rz1000_fifo_disable(struct pci_dev *pdev) reg &= 0xDFFF; if (pci_write_config_word(pdev, 0x40, reg) != 0) return -1; - printk(KERN_INFO DRV_NAME ": disabled chipset readahead.\n"); + dev_info(&pdev->dev, "disabled chipset readahead.\n"); return 0; } @@ -97,7 +97,7 @@ static int rz1000_init_one (struct pci_dev *pdev, const struct pci_device_id *en if (rz1000_fifo_disable(pdev) == 0) return ata_pci_sff_init_one(pdev, ppi, &rz1000_sht, NULL, 0); - printk(KERN_ERR DRV_NAME ": failed to disable read-ahead on chipset..\n"); + dev_err(&pdev->dev, "failed to disable read-ahead on chipset.\n"); /* Not safe to use so skip */ return -ENODEV; } From f9bcf5ba2d5f83a694312814fe2e1573891cd054 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:19 +0100 Subject: [PATCH 279/493] ata: pata_serverworks: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_serverworks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c index b602e303fb54..e410fe44177f 100644 --- a/drivers/ata/pata_serverworks.c +++ b/drivers/ata/pata_serverworks.c @@ -286,13 +286,13 @@ static int serverworks_fixup_osb4(struct pci_dev *pdev) pci_read_config_dword(isa_dev, 0x64, ®); reg &= ~0x00002000; /* disable 600ns interrupt mask */ if (!(reg & 0x00004000)) - printk(KERN_DEBUG DRV_NAME ": UDMA not BIOS enabled.\n"); + dev_info(&pdev->dev, "UDMA not BIOS enabled.\n"); reg |= 0x00004000; /* enable UDMA/33 support */ pci_write_config_dword(isa_dev, 0x64, reg); pci_dev_put(isa_dev); return 0; } - printk(KERN_WARNING DRV_NAME ": Unable to find bridge.\n"); + dev_warn(&pdev->dev, "Unable to find bridge.\n"); return -ENODEV; } From 3156234b61036a6db5f229c47f2ad8052962949a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:20 +0100 Subject: [PATCH 280/493] ata: pata_sil680: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_sil680.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c index 81238e097fe2..0da58ce20d82 100644 --- a/drivers/ata/pata_sil680.c +++ b/drivers/ata/pata_sil680.c @@ -308,17 +308,17 @@ static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio) switch (tmpbyte & 0x30) { case 0x00: - printk(KERN_INFO "sil680: 100MHz clock.\n"); + dev_info(&pdev->dev, "sil680: 100MHz clock.\n"); break; case 0x10: - printk(KERN_INFO "sil680: 133MHz clock.\n"); + dev_info(&pdev->dev, "sil680: 133MHz clock.\n"); break; case 0x20: - printk(KERN_INFO "sil680: Using PCI clock.\n"); + dev_info(&pdev->dev, "sil680: Using PCI clock.\n"); break; /* This last case is _NOT_ ok */ case 0x30: - printk(KERN_ERR "sil680: Clock disabled ?\n"); + dev_err(&pdev->dev, "sil680: Clock disabled ?\n"); } return tmpbyte & 0x30; } From 16d6623fe958b7fffb35ef4e0120385497295685 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:21 +0100 Subject: [PATCH 281/493] ata: sata_sx4: convert printk() calls Convert printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_sx4.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 5d7913644dfc..6ceec59cb291 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -1179,15 +1179,16 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_host *host) /* Turn on for ECC */ if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE, &spd0)) { - pr_err("Failed in i2c read: device=%#x, subaddr=%#x\n", - PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE); + dev_err(host->dev, + "Failed in i2c read: device=%#x, subaddr=%#x\n", + PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE); return 1; } if (spd0 == 0x02) { data |= (0x01 << 16); writel(data, mmio + PDC_SDRAM_CONTROL); readl(mmio + PDC_SDRAM_CONTROL); - printk(KERN_ERR "Local DIMM ECC Enabled\n"); + dev_err(host->dev, "Local DIMM ECC Enabled\n"); } /* DIMM Initialization Select/Enable (bit 18/19) */ @@ -1279,7 +1280,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) and program the DIMM Module Controller. */ if (!(speed = pdc20621_detect_dimm(host))) { - printk(KERN_ERR "Detect Local DIMM Fail\n"); + dev_err(host->dev, "Detect Local DIMM Fail\n"); return 1; /* DIMM error */ } dev_dbg(host->dev, "Local DIMM Speed = %d\n", speed); From f76ba003d1b6ac81a8532e49878659c66a361664 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:22 +0100 Subject: [PATCH 282/493] ata: sata_mv: convert remaining printk() to structured logging Refactor the .reset_hc() callback and convert the remaining printk() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_mv.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 70743cd50a97..53446b997740 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -579,7 +579,7 @@ struct mv_hw_ops { void (*enable_leds)(struct mv_host_priv *hpriv, void __iomem *mmio); void (*read_preamp)(struct mv_host_priv *hpriv, int idx, void __iomem *mmio); - int (*reset_hc)(struct mv_host_priv *hpriv, void __iomem *mmio, + int (*reset_hc)(struct ata_host *host, void __iomem *mmio, unsigned int n_hc); void (*reset_flash)(struct mv_host_priv *hpriv, void __iomem *mmio); void (*reset_bus)(struct ata_host *host, void __iomem *mmio); @@ -606,7 +606,7 @@ static void mv5_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio, static void mv5_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio); static void mv5_read_preamp(struct mv_host_priv *hpriv, int idx, void __iomem *mmio); -static int mv5_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio, +static int mv5_reset_hc(struct ata_host *host, void __iomem *mmio, unsigned int n_hc); static void mv5_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio); static void mv5_reset_bus(struct ata_host *host, void __iomem *mmio); @@ -616,14 +616,14 @@ static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio, static void mv6_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio); static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx, void __iomem *mmio); -static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio, +static int mv6_reset_hc(struct ata_host *host, void __iomem *mmio, unsigned int n_hc); static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio); static void mv_soc_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio); static void mv_soc_read_preamp(struct mv_host_priv *hpriv, int idx, void __iomem *mmio); -static int mv_soc_reset_hc(struct mv_host_priv *hpriv, +static int mv_soc_reset_hc(struct ata_host *host, void __iomem *mmio, unsigned int n_hc); static void mv_soc_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio); @@ -3194,9 +3194,10 @@ static void mv5_reset_one_hc(struct mv_host_priv *hpriv, void __iomem *mmio, } #undef ZERO -static int mv5_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio, +static int mv5_reset_hc(struct ata_host *host, void __iomem *mmio, unsigned int n_hc) { + struct mv_host_priv *hpriv = host->private_data; unsigned int hc, port; for (hc = 0; hc < n_hc; hc++) { @@ -3255,7 +3256,7 @@ static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio) * LOCKING: * Inherited from caller. */ -static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio, +static int mv6_reset_hc(struct ata_host *host, void __iomem *mmio, unsigned int n_hc) { void __iomem *reg = mmio + PCI_MAIN_CMD_STS; @@ -3275,7 +3276,7 @@ static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio, break; } if (!(PCI_MASTER_EMPTY & t)) { - printk(KERN_ERR DRV_NAME ": PCI master won't flush\n"); + dev_err(host->dev, "PCI master won't flush\n"); rc = 1; goto done; } @@ -3289,7 +3290,7 @@ static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio, } while (!(GLOB_SFT_RST & t) && (i-- > 0)); if (!(GLOB_SFT_RST & t)) { - printk(KERN_ERR DRV_NAME ": can't set global reset\n"); + dev_err(host->dev, "can't set global reset\n"); rc = 1; goto done; } @@ -3303,7 +3304,7 @@ static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio, } while ((GLOB_SFT_RST & t) && (i-- > 0)); if (GLOB_SFT_RST & t) { - printk(KERN_ERR DRV_NAME ": can't clear global reset\n"); + dev_err(host->dev, "can't clear global reset\n"); rc = 1; } done: @@ -3472,9 +3473,10 @@ static void mv_soc_reset_one_hc(struct mv_host_priv *hpriv, #undef ZERO -static int mv_soc_reset_hc(struct mv_host_priv *hpriv, +static int mv_soc_reset_hc(struct ata_host *host, void __iomem *mmio, unsigned int n_hc) { + struct mv_host_priv *hpriv = host->private_data; unsigned int port; for (port = 0; port < hpriv->n_ports; port++) @@ -3847,11 +3849,11 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx) * * Warn the user, lest they think we're just buggy. */ - printk(KERN_WARNING DRV_NAME ": Highpoint RocketRAID" + dev_warn(&pdev->dev, "Highpoint RocketRAID" " BIOS CORRUPTS DATA on all attached drives," " regardless of if/how they are configured." " BEWARE!\n"); - printk(KERN_WARNING DRV_NAME ": For data safety, do not" + dev_warn(&pdev->dev, "For data safety, do not" " use sectors 8-9 on \"Legacy\" drives," " and avoid the final two gigabytes on" " all RocketRAID BIOS initialized drives.\n"); @@ -3942,7 +3944,7 @@ static int mv_init_host(struct ata_host *host) if (hpriv->ops->read_preamp) hpriv->ops->read_preamp(hpriv, port, mmio); - rc = hpriv->ops->reset_hc(hpriv, mmio, n_hc); + rc = hpriv->ops->reset_hc(host, mmio, n_hc); if (rc) goto done; @@ -4258,7 +4260,7 @@ static int mv_platform_resume(struct platform_device *pdev) /* initialize adapter */ ret = mv_init_host(host); if (ret) { - printk(KERN_ERR DRV_NAME ": Error during HW init\n"); + dev_err(&pdev->dev, "Error during HW init\n"); return ret; } ata_host_resume(host); From f06c13aa01a9855e816fda296e3eda2e656b4c53 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:23 +0100 Subject: [PATCH 283/493] ata: pata_hpt37x: convert pr_XXX() calls Convert pr_XXX() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_hpt37x.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c index f242157bc81b..7abc7e04f656 100644 --- a/drivers/ata/pata_hpt37x.c +++ b/drivers/ata/pata_hpt37x.c @@ -14,9 +14,6 @@ * TODO * Look into engine reset on timeout errors. Should not be required. */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include #include #include @@ -231,7 +228,8 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr, i = match_string(list, -1, model_num); if (i >= 0) { - pr_warn("%s is not supported for %s\n", modestr, list[i]); + ata_dev_warn(dev, "%s is not supported for %s\n", + modestr, list[i]); return 1; } return 0; @@ -864,7 +862,8 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) chip_table = &hpt372; break; default: - pr_err("Unknown HPT366 subtype, please report (%d)\n", + dev_err(&dev->dev, + "Unknown HPT366 subtype, please report (%d)\n", rev); return -ENODEV; } @@ -905,7 +904,8 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) *ppi = &info_hpt374_fn1; break; default: - pr_err("PCI table is bogus, please report (%d)\n", dev->device); + dev_err(&dev->dev, "PCI table is bogus, please report (%d)\n", + dev->device); return -ENODEV; } /* Ok so this is a chip we support */ @@ -953,7 +953,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) u8 sr; u32 total = 0; - pr_warn("BIOS has not set timing clocks\n"); + dev_warn(&dev->dev, "BIOS has not set timing clocks\n"); /* This is the process the HPT371 BIOS is reported to use */ for (i = 0; i < 128; i++) { @@ -1009,7 +1009,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) (f_high << 16) | f_low | 0x100); } if (adjust == 8) { - pr_err("DPLL did not stabilize!\n"); + dev_err(&dev->dev, "DPLL did not stabilize!\n"); return -ENODEV; } if (dpll == 3) @@ -1017,7 +1017,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) else private_data = (void *)hpt37x_timings_50; - pr_info("bus clock %dMHz, using %dMHz DPLL\n", + dev_info(&dev->dev, "bus clock %dMHz, using %dMHz DPLL\n", MHz[clock_slot], MHz[dpll]); } else { private_data = (void *)chip_table->clocks[clock_slot]; @@ -1032,7 +1032,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) if (clock_slot < 2 && ppi[0] == &info_hpt370a) ppi[0] = &info_hpt370a_33; - pr_info("%s using %dMHz bus clock\n", + dev_info(&dev->dev, "%s using %dMHz bus clock\n", chip_table->name, MHz[clock_slot]); } From cb3f48fc57508aea8698e0bee99068fddde30ad9 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:24 +0100 Subject: [PATCH 284/493] ata: pata_octeon_cf: Replace pr_XXX() calls with structured logging Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_octeon_cf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index a2e7dcaa87ac..df62e22b49a6 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -273,9 +273,9 @@ static void octeon_cf_set_dmamode(struct ata_port *ap, struct ata_device *dev) dma_tim.s.we_n = ns_to_tim_reg(tim_mult, oe_n); dma_tim.s.we_a = ns_to_tim_reg(tim_mult, oe_a); - pr_debug("ns to ticks (mult %d) of %d is: %d\n", tim_mult, 60, + ata_dev_dbg(dev, "ns to ticks (mult %d) of %d is: %d\n", tim_mult, 60, ns_to_tim_reg(tim_mult, 60)); - pr_debug("oe_n: %d, oe_a: %d, dmack_s: %d, dmack_h: %d, dmarq: %d, pause: %d\n", + ata_dev_dbg(dev, "oe_n: %d, oe_a: %d, dmack_s: %d, dmack_h: %d, dmarq: %d, pause: %d\n", dma_tim.s.oe_n, dma_tim.s.oe_a, dma_tim.s.dmack_s, dma_tim.s.dmack_h, dma_tim.s.dmarq, dma_tim.s.pause); From cb8d5daae9adcc5dac44c068d5d795056aa6d30c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:25 +0100 Subject: [PATCH 285/493] ata: pata_hpt3x2n: convert pr_XXX() calls Convert pr_XXX() calls to structured logging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_hpt3x2n.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c index 48eef338e050..1d9d4eec5b8a 100644 --- a/drivers/ata/pata_hpt3x2n.c +++ b/drivers/ata/pata_hpt3x2n.c @@ -15,9 +15,6 @@ * TODO * Work out best PLL policy */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include #include #include @@ -420,7 +417,7 @@ static int hpt3x2n_pci_clock(struct pci_dev *pdev) u16 sr; u32 total = 0; - pr_warn("BIOS clock data not set\n"); + dev_warn(&pdev->dev, "BIOS clock data not set\n"); /* This is the process the HPT371 BIOS is reported to use */ for (i = 0; i < 128; i++) { @@ -530,7 +527,8 @@ hpt372n: ppi[0] = &info_hpt372n; break; default: - pr_err("PCI table is bogus, please report (%d)\n", dev->device); + dev_err(&dev->dev,"PCI table is bogus, please report (%d)\n", + dev->device); return -ENODEV; } @@ -579,11 +577,11 @@ hpt372n: pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low); } if (adjust == 8) { - pr_err("DPLL did not stabilize!\n"); + dev_err(&dev->dev, "DPLL did not stabilize!\n"); return -ENODEV; } - pr_info("bus clock %dMHz, using 66MHz DPLL\n", pci_mhz); + dev_info(&dev->dev, "bus clock %dMHz, using 66MHz DPLL\n", pci_mhz); /* * Set our private data up. We only need a few flags From 97b7925a5cb44dae4b9f0c8f1b22427521b1de8d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:26 +0100 Subject: [PATCH 286/493] ata: sata_gemini: convert pr_err() calls Convert pr_err() calls to dev_err() Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_gemini.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c index f793564f3d78..440a63de20d0 100644 --- a/drivers/ata/sata_gemini.c +++ b/drivers/ata/sata_gemini.c @@ -253,12 +253,12 @@ static int gemini_sata_bridge_init(struct sata_gemini *sg) ret = clk_prepare_enable(sg->sata0_pclk); if (ret) { - pr_err("failed to enable SATA0 PCLK\n"); + dev_err(dev, "failed to enable SATA0 PCLK\n"); return ret; } ret = clk_prepare_enable(sg->sata1_pclk); if (ret) { - pr_err("failed to enable SATA1 PCLK\n"); + dev_err(dev, "failed to enable SATA1 PCLK\n"); clk_disable_unprepare(sg->sata0_pclk); return ret; } From cbc59b8c20863cca43b8b9552cf409a2c8d1be7a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:27 +0100 Subject: [PATCH 287/493] ata: pata_hpt366: convert pr_warn() calls Convert pr_warn() calls to ata_dev_warn() Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/pata_hpt366.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c index 06b7c4a9ec95..778c893f276b 100644 --- a/drivers/ata/pata_hpt366.c +++ b/drivers/ata/pata_hpt366.c @@ -14,9 +14,6 @@ * TODO * Look into engine reset on timeout errors. Should not be required. */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include #include #include @@ -183,7 +180,7 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr, i = match_string(list, -1, model_num); if (i >= 0) { - pr_warn("%s is not supported for %s\n", modestr, list[i]); + ata_dev_warn(dev, "%s is not supported for %s\n", modestr, list[i]); return 1; } return 0; From 41d4c60f8623d8a42f649376f678e27d802b8163 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:28 +0100 Subject: [PATCH 288/493] ata: libata-scsi: rework ata_dump_status to avoid using pr_cont() pr_cont() has the problem that individual calls will be disrupted under high load, causing each call to end up on a single line and thereby mangling the output. So rework ata_dump_status() to have just one call to ata_port_warn() and avoid this problem. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 51 ++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 11fb046e3035..a16ef0030667 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -668,7 +668,7 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc) /** * ata_dump_status - user friendly display of error info - * @id: id of the port in question + * @ap: the port in question * @tf: ptr to filled out taskfile * * Decode and dump the ATA error/status registers for the user so @@ -678,37 +678,32 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc) * LOCKING: * inherited from caller */ -static void ata_dump_status(unsigned id, struct ata_taskfile *tf) +static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf) { u8 stat = tf->command, err = tf->feature; - pr_warn("ata%u: status=0x%02x { ", id, stat); if (stat & ATA_BUSY) { - pr_cont("Busy }\n"); /* Data is not valid in this case */ + ata_port_warn(ap, "status=0x%02x {Busy} ", stat); } else { - if (stat & ATA_DRDY) pr_cont("DriveReady "); - if (stat & ATA_DF) pr_cont("DeviceFault "); - if (stat & ATA_DSC) pr_cont("SeekComplete "); - if (stat & ATA_DRQ) pr_cont("DataRequest "); - if (stat & ATA_CORR) pr_cont("CorrectedError "); - if (stat & ATA_SENSE) pr_cont("Sense "); - if (stat & ATA_ERR) pr_cont("Error "); - pr_cont("}\n"); - - if (err) { - pr_warn("ata%u: error=0x%02x { ", id, err); - if (err & ATA_ABORTED) pr_cont("DriveStatusError "); - if (err & ATA_ICRC) { - if (err & ATA_ABORTED) - pr_cont("BadCRC "); - else pr_cont("Sector "); - } - if (err & ATA_UNC) pr_cont("UncorrectableError "); - if (err & ATA_IDNF) pr_cont("SectorIdNotFound "); - if (err & ATA_TRK0NF) pr_cont("TrackZeroNotFound "); - if (err & ATA_AMNF) pr_cont("AddrMarkNotFound "); - pr_cont("}\n"); - } + ata_port_warn(ap, "status=0x%02x { %s%s%s%s%s%s%s} ", stat, + stat & ATA_DRDY ? "DriveReady " : "", + stat & ATA_DF ? "DeviceFault " : "", + stat & ATA_DSC ? "SeekComplete " : "", + stat & ATA_DRQ ? "DataRequest " : "", + stat & ATA_CORR ? "CorrectedError " : "", + stat & ATA_SENSE ? "Sense " : "", + stat & ATA_ERR ? "Error " : ""); + if (err) + ata_port_warn(ap, "error=0x%02x {%s%s%s%s%s%s", err, + err & ATA_ABORTED ? + "DriveStatusError " : "", + err & ATA_ICRC ? + (err & ATA_ABORTED ? + "BadCRC " : "Sector ") : "", + err & ATA_UNC ? "UncorrectableError " : "", + err & ATA_IDNF ? "SectorIdNotFound " : "", + err & ATA_TRK0NF ? "TrackZeroNotFound " : "", + err & ATA_AMNF ? "AddrMarkNotFound " : ""); } } @@ -1662,7 +1657,7 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) cmd->result = SAM_STAT_GOOD; if (need_sense && !ap->ops->error_handler) - ata_dump_status(ap->print_id, &qc->result_tf); + ata_dump_status(ap, &qc->result_tf); ata_qc_done(qc); } From 898a276d4304263e83edffa0bc1792aa8116cc90 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:29 +0100 Subject: [PATCH 289/493] ata: sata_dwc_460ex: drop DEBUG_NCQ Obsolete, and has been converted to tracepoints. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_dwc_460ex.c | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index c33dc98e0d9d..448d88cf1b38 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -20,7 +20,6 @@ #ifdef CONFIG_SATA_DWC_VDEBUG #define VERBOSE_DEBUG -#define DEBUG_NCQ #endif #include @@ -296,22 +295,6 @@ static const char *get_prot_descript(u8 protocol) } } -#ifdef DEBUG_NCQ -static const char *get_dma_dir_descript(int dma_dir) -{ - switch ((enum dma_data_direction)dma_dir) { - case DMA_BIDIRECTIONAL: - return "bidirectional"; - case DMA_TO_DEVICE: - return "to device"; - case DMA_FROM_DEVICE: - return "from device"; - default: - return "none"; - } -} -#endif - static void dma_dwc_xfer_done(void *hsdev_instance) { unsigned long flags; @@ -750,17 +733,6 @@ static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status) return; } -#ifdef DEBUG_NCQ - if (tag > 0) { - dev_info(ap->dev, - "%s tag=%u cmd=0x%02x dma dir=%s proto=%s dmacr=0x%08x\n", - __func__, qc->hw_tag, qc->tf.command, - get_dma_dir_descript(qc->dma_dir), - get_prot_descript(qc->tf.protocol), - sata_dwc_readl(&hsdev->sata_dwc_regs->dmacr)); - } -#endif - if (ata_is_dma(qc->tf.protocol)) { if (hsdevp->dma_pending[tag] == SATA_DWC_DMA_PENDING_NONE) { dev_err(ap->dev, From d4caa9054e4f9405e8d3d93a5891fe20256257f2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:30 +0100 Subject: [PATCH 290/493] ata: sata_dwc_460ex: remove 'check_status' argument Remove the 'check_status' argument from sata_dwc_qc_complete() and sata_dwc_dma_xfer_complete() as it has no functionality. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/sata_dwc_460ex.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 448d88cf1b38..319998dcbe58 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -182,9 +182,8 @@ enum { * Prototypes */ static void sata_dwc_bmdma_start_by_tag(struct ata_queued_cmd *qc, u8 tag); -static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc, - u32 check_status); -static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status); +static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc); +static void sata_dwc_dma_xfer_complete(struct ata_port *ap); static void sata_dwc_clear_dmacr(struct sata_dwc_device_port *hsdevp, u8 tag); #ifdef CONFIG_SATA_DWC_OLD_DMA @@ -324,7 +323,7 @@ static void dma_dwc_xfer_done(void *hsdev_instance) } if ((hsdevp->dma_interrupt_count % 2) == 0) - sata_dwc_dma_xfer_complete(ap, 1); + sata_dwc_dma_xfer_complete(ap); spin_unlock_irqrestore(&host->lock, flags); } @@ -556,7 +555,7 @@ static irqreturn_t sata_dwc_isr(int irq, void *dev_instance) if (status & ATA_ERR) { dev_dbg(ap->dev, "interrupt ATA_ERR (0x%x)\n", status); - sata_dwc_qc_complete(ap, qc, 1); + sata_dwc_qc_complete(ap, qc); handled = 1; goto DONE; } @@ -581,13 +580,13 @@ DRVSTILLBUSY: } if ((hsdevp->dma_interrupt_count % 2) == 0) - sata_dwc_dma_xfer_complete(ap, 1); + sata_dwc_dma_xfer_complete(ap); } else if (ata_is_pio(qc->tf.protocol)) { ata_sff_hsm_move(ap, qc, status, 0); handled = 1; goto DONE; } else { - if (unlikely(sata_dwc_qc_complete(ap, qc, 1))) + if (unlikely(sata_dwc_qc_complete(ap, qc))) goto DRVSTILLBUSY; } @@ -647,7 +646,7 @@ DRVSTILLBUSY: if (status & ATA_ERR) { dev_dbg(ap->dev, "%s ATA_ERR (0x%x)\n", __func__, status); - sata_dwc_qc_complete(ap, qc, 1); + sata_dwc_qc_complete(ap, qc); handled = 1; goto DONE; } @@ -662,9 +661,9 @@ DRVSTILLBUSY: dev_warn(ap->dev, "%s: DMA not pending?\n", __func__); if ((hsdevp->dma_interrupt_count % 2) == 0) - sata_dwc_dma_xfer_complete(ap, 1); + sata_dwc_dma_xfer_complete(ap); } else { - if (unlikely(sata_dwc_qc_complete(ap, qc, 1))) + if (unlikely(sata_dwc_qc_complete(ap, qc))) goto STILLBUSY; } continue; @@ -719,7 +718,7 @@ static void sata_dwc_clear_dmacr(struct sata_dwc_device_port *hsdevp, u8 tag) } } -static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status) +static void sata_dwc_dma_xfer_complete(struct ata_port *ap) { struct ata_queued_cmd *qc; struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap); @@ -742,15 +741,14 @@ static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status) } hsdevp->dma_pending[tag] = SATA_DWC_DMA_PENDING_NONE; - sata_dwc_qc_complete(ap, qc, check_status); + sata_dwc_qc_complete(ap, qc); ap->link.active_tag = ATA_TAG_POISON; } else { - sata_dwc_qc_complete(ap, qc, check_status); + sata_dwc_qc_complete(ap, qc); } } -static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc, - u32 check_status) +static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc) { u8 status = 0; u32 mask = 0x0; @@ -758,7 +756,6 @@ static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc, struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap); struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap); hsdev->sactive_queued = 0; - dev_dbg(ap->dev, "%s checkstatus? %x\n", __func__, check_status); if (hsdevp->dma_pending[tag] == SATA_DWC_DMA_PENDING_TX) dev_err(ap->dev, "TX DMA PENDING\n"); From 1d009eb6fefb64fb8db1cf9ee179133fc7270f2f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:31 +0100 Subject: [PATCH 291/493] ata: sata_dwc_460ex: Remove debug compile options Driver has been converted to dynamic debugging, so the compile-time options don't have any functionality left. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/Kconfig | 12 ------------ drivers/ata/sata_dwc_460ex.c | 8 -------- 2 files changed, 20 deletions(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index a7da8ea7b3ed..f6e943c74001 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -432,18 +432,6 @@ config SATA_DWC_OLD_DMA This option enables support for old device trees without the "dmas" property. -config SATA_DWC_DEBUG - bool "Debugging driver version" - depends on SATA_DWC - help - This option enables debugging output in the driver. - -config SATA_DWC_VDEBUG - bool "Verbose debug output" - depends on SATA_DWC_DEBUG - help - This option enables the taskfile dumping and NCQ debugging. - config SATA_HIGHBANK tristate "Calxeda Highbank SATA support" depends on ARCH_HIGHBANK || COMPILE_TEST diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 319998dcbe58..bec33d781ae0 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -14,14 +14,6 @@ * COPYRIGHT (C) 2005 SYNOPSYS, INC. ALL RIGHTS RESERVED */ -#ifdef CONFIG_SATA_DWC_DEBUG -#define DEBUG -#endif - -#ifdef CONFIG_SATA_DWC_VDEBUG -#define VERBOSE_DEBUG -#endif - #include #include #include From 87924c5b4094f195507bebcab96e141e48c947d7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 13:46:18 +0900 Subject: [PATCH 292/493] ata: sata_fsl: add compile test support Add dependendy on COMPILE_TEST to allow compile tests with configs that do not enable FSL_SOC. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index f6e943c74001..af6bf1b8902a 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -273,7 +273,7 @@ config AHCI_QORIQ config SATA_FSL tristate "Freescale 3.0Gbps SATA support" - depends on FSL_SOC + depends on FSL_SOC || COMPILE_TEST select SATA_HOST help This option enables support for Freescale 3.0Gbps SATA controller. From 641ba1a5e2f88039b0d62524b2eb668680c94ea9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 14:48:17 +0900 Subject: [PATCH 293/493] ata: ahci_brcm: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_BRCMSTB, BMIPS_GENERIC or ARCH_BCM_XXX. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index af6bf1b8902a..2ea0dcba45c7 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -146,7 +146,7 @@ config SATA_AHCI_PLATFORM config AHCI_BRCM tristate "Broadcom AHCI SATA support" depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP || \ - ARCH_BCM_63XX + ARCH_BCM_63XX || COMPILE_TEST select SATA_HOST help This option enables support for the AHCI SATA3 controller found on From e73d737894dc4a59f232e1a1b16d968569fa0ffd Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 14:51:43 +0900 Subject: [PATCH 294/493] ata: ahci_da850: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_DAVINCI_DA850. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 2ea0dcba45c7..b100565762e9 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -156,7 +156,7 @@ config AHCI_BRCM config AHCI_DA850 tristate "DaVinci DA850 AHCI SATA support" - depends on ARCH_DAVINCI_DA850 + depends on ARCH_DAVINCI_DA850 || COMPILE_TEST select SATA_HOST help This option enables support for the DaVinci DA850 SoC's From 56e18702b0c240dc3c4fde0619e8a78f5f13be97 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 14:53:49 +0900 Subject: [PATCH 295/493] ata: ahci_dm816: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_OMAP2PLUS. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index b100565762e9..47b7b69b88b1 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -166,7 +166,7 @@ config AHCI_DA850 config AHCI_DM816 tristate "DaVinci DM816 AHCI SATA support" - depends on ARCH_OMAP2PLUS + depends on ARCH_OMAP2PLUS || COMPILE_TEST select SATA_HOST help This option enables support for the DaVinci DM816 SoC's From 07f910f9b7295b6a28b337fedb56e612684c5659 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Oct 2021 14:46:50 +0100 Subject: [PATCH 296/493] mm: Remove slab from struct page All members of struct slab can now be removed from struct page. This shrinks the definition of struct page by 30 LOC, making it easier to understand. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka --- include/linux/mm_types.h | 28 ---------------------------- include/linux/page-flags.h | 37 ------------------------------------- mm/slab.h | 6 ------ 3 files changed, 71 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1ae3537c7920..646f3ed4f6df 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -118,31 +118,6 @@ struct page { atomic_long_t pp_frag_count; }; }; - struct { /* slab, slob and slub */ - union { - struct list_head slab_list; - struct { /* Partial pages */ - struct page *next; -#ifdef CONFIG_64BIT - int pages; /* Nr of pages left */ -#else - short int pages; -#endif - }; - }; - struct kmem_cache *slab_cache; /* not slob */ - /* Double-word boundary */ - void *freelist; /* first free object */ - union { - void *s_mem; /* slab: first object */ - unsigned long counters; /* SLUB */ - struct { /* SLUB */ - unsigned inuse:16; - unsigned objects:15; - unsigned frozen:1; - }; - }; - }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ @@ -206,9 +181,6 @@ struct page { * which are currently stored here. */ unsigned int page_type; - - unsigned int active; /* SLAB */ - int units; /* SLOB */ }; /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b5f14d581113..1b08e33265fa 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -909,43 +909,6 @@ extern bool is_free_buddy_page(struct page *page); __PAGEFLAG(Isolated, isolated, PF_ANY); -/* - * If network-based swap is enabled, sl*b must keep track of whether pages - * were allocated from pfmemalloc reserves. - */ -static inline int PageSlabPfmemalloc(struct page *page) -{ - VM_BUG_ON_PAGE(!PageSlab(page), page); - return PageActive(page); -} - -/* - * A version of PageSlabPfmemalloc() for opportunistic checks where the page - * might have been freed under us and not be a PageSlab anymore. - */ -static inline int __PageSlabPfmemalloc(struct page *page) -{ - return PageActive(page); -} - -static inline void SetPageSlabPfmemalloc(struct page *page) -{ - VM_BUG_ON_PAGE(!PageSlab(page), page); - SetPageActive(page); -} - -static inline void __ClearPageSlabPfmemalloc(struct page *page) -{ - VM_BUG_ON_PAGE(!PageSlab(page), page); - __ClearPageActive(page); -} - -static inline void ClearPageSlabPfmemalloc(struct page *page) -{ - VM_BUG_ON_PAGE(!PageSlab(page), page); - ClearPageActive(page); -} - #ifdef CONFIG_MMU #define __PG_MLOCKED (1UL << PG_mlocked) #else diff --git a/mm/slab.h b/mm/slab.h index 95b9a74a2d51..207658b200ef 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -67,14 +67,8 @@ struct slab { static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) SLAB_MATCH(flags, __page_flags); SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ -SLAB_MATCH(slab_list, slab_list); #ifndef CONFIG_SLOB SLAB_MATCH(rcu_head, rcu_head); -SLAB_MATCH(slab_cache, slab_cache); -#endif -#ifdef CONFIG_SLAB -SLAB_MATCH(s_mem, s_mem); -SLAB_MATCH(active, active); #endif SLAB_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG From 31834aaa4e2a26d8d1f6b36703bb35cfdb8fc98c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 6 Jan 2022 15:54:48 +0800 Subject: [PATCH 297/493] ACPI: pfr_update: Fix return value check in pfru_write() In case of error, memremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Fixes: 0db89fa243e5 ("ACPI: Introduce Platform Firmware Runtime Update device driver") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Acked-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pfr_update.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/pfr_update.c b/drivers/acpi/pfr_update.c index 149b5b2530b9..6bb0b778b5da 100644 --- a/drivers/acpi/pfr_update.c +++ b/drivers/acpi/pfr_update.c @@ -460,8 +460,8 @@ static ssize_t pfru_write(struct file *file, const char __user *buf, /* map the communication buffer */ phy_addr = (phys_addr_t)((buf_info.addr_hi << 32) | buf_info.addr_lo); buf_ptr = memremap(phy_addr, buf_info.buf_size, MEMREMAP_WB); - if (IS_ERR(buf_ptr)) - return PTR_ERR(buf_ptr); + if (!buf_ptr) + return -ENOMEM; if (!copy_from_iter_full(buf_ptr, len, &iter)) { ret = -EINVAL; From 080dc5e5656c1cc1cdefb501b9b645a07519f763 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 19 Jul 2021 17:05:53 +0000 Subject: [PATCH 298/493] cifs: take cifs_tcp_ses_lock for status checks While checking/updating status for tcp ses, smb ses or tcon, we take GlobalMid_Lock. This doesn't make any sense. Replaced it with cifs_tcp_ses_lock. Ideally, we should take a spin lock per struct. But since tcp ses, smb ses and tcon objects won't add up to a lot, I think there should not be too much contention. Also, in few other places, these are checked without locking. Added locking for these. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifs_swn.c | 4 +-- fs/cifs/cifsencrypt.c | 6 ++++- fs/cifs/cifsglob.h | 4 +-- fs/cifs/cifssmb.c | 12 ++++++++- fs/cifs/connect.c | 36 +++++++++++++++++++-------- fs/cifs/netmisc.c | 4 +-- fs/cifs/sess.c | 8 +++--- fs/cifs/smb1ops.c | 11 +++++++-- fs/cifs/smb2ops.c | 15 +++++++++-- fs/cifs/smb2pdu.c | 21 +++++++++++++--- fs/cifs/smb2transport.c | 31 ++++++++++++++++++----- fs/cifs/transport.c | 55 +++++++++++++++++++++++++++++++++++------ 12 files changed, 164 insertions(+), 43 deletions(-) diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index 23a1ed2fb769..8f386dd9939e 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -498,10 +498,10 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a goto unlock; } - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); if (tcon->ses->server->tcpStatus != CifsExiting) tcon->ses->server->tcpStatus = CifsNeedReconnect; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); unlock: mutex_unlock(&tcon->ses->server->srv_mutex); diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index d118282071b3..0912d8bbbac1 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -141,9 +141,13 @@ int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, if ((cifs_pdu == NULL) || (server == NULL)) return -EINVAL; + spin_lock(&cifs_tcp_ses_lock); if (!(cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) || - server->tcpStatus == CifsNeedNegotiate) + server->tcpStatus == CifsNeedNegotiate) { + spin_unlock(&cifs_tcp_ses_lock); return rc; + } + spin_unlock(&cifs_tcp_ses_lock); if (!server->session_estab) { memcpy(cifs_pdu->Signature.SecuritySignature, "BSRSPYL", 8); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 23d76ae713f0..4ba35faff79c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -586,7 +586,7 @@ struct TCP_Server_Info { char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; struct smb_version_operations *ops; struct smb_version_values *vals; - /* updates to tcpStatus protected by GlobalMid_Lock */ + /* updates to tcpStatus protected by cifs_tcp_ses_lock */ enum statusEnum tcpStatus; /* what we think the status is */ char *hostname; /* hostname portion of UNC string */ struct socket *ssocket; @@ -924,7 +924,7 @@ struct cifs_ses { struct mutex session_mutex; struct TCP_Server_Info *server; /* pointer to server info */ int ses_count; /* reference counter */ - enum statusEnum status; /* updates protected by GlobalMid_Lock */ + enum statusEnum status; /* updates protected by cifs_tcp_ses_lock */ unsigned overrideSecFlg; /* if non-zero override global sec flags */ char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 7b1d0d71f3f1..3ef2796e2f24 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -120,15 +120,18 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) * only tree disconnect, open, and write, (and ulogoff which does not * have tcon) are allowed as we start force umount */ + spin_lock(&cifs_tcp_ses_lock); if (tcon->tidStatus == CifsExiting) { if (smb_command != SMB_COM_WRITE_ANDX && smb_command != SMB_COM_OPEN_ANDX && smb_command != SMB_COM_TREE_DISCONNECT) { + spin_unlock(&cifs_tcp_ses_lock); cifs_dbg(FYI, "can not send cmd %d while umounting\n", smb_command); return -ENODEV; } } + spin_unlock(&cifs_tcp_ses_lock); retries = server->nr_targets; @@ -148,8 +151,12 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) } /* are we still trying to reconnect? */ - if (server->tcpStatus != CifsNeedReconnect) + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus != CifsNeedReconnect) { + spin_unlock(&cifs_tcp_ses_lock); break; + } + spin_unlock(&cifs_tcp_ses_lock); if (retries && --retries) continue; @@ -186,11 +193,14 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) * and the server never sends an answer the socket will be closed * and tcpStatus set to reconnect. */ + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedReconnect) { + spin_unlock(&cifs_tcp_ses_lock); rc = -EHOSTDOWN; mutex_unlock(&ses->session_mutex); goto out; } + spin_unlock(&cifs_tcp_ses_lock); /* * need to prevent multiple threads trying to simultaneously diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7b478f5db9d6..815f629933de 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -467,9 +467,12 @@ reconnect_dfs_server(struct TCP_Server_Info *server, dfs_cache_free_tgts(&tl); /* Need to set up echo worker again once connection has been established */ + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedNegotiate) mod_delayed_work(cifsiod_wq, &server->echo, 0); + spin_unlock(&cifs_tcp_ses_lock); + wake_up(&server->response_q); return rc; } @@ -571,15 +574,18 @@ server_unresponsive(struct TCP_Server_Info *server) * 65s kernel_recvmsg times out, and we see that we haven't gotten * a response in >60s. */ + spin_lock(&cifs_tcp_ses_lock); if ((server->tcpStatus == CifsGood || server->tcpStatus == CifsNeedNegotiate) && (!server->ops->can_echo || server->ops->can_echo(server)) && time_after(jiffies, server->lstrp + 3 * server->echo_interval)) { + spin_unlock(&cifs_tcp_ses_lock); cifs_server_dbg(VFS, "has not responded in %lu seconds. Reconnecting...\n", (3 * server->echo_interval) / HZ); cifs_reconnect(server, false); return true; } + spin_unlock(&cifs_tcp_ses_lock); return false; } @@ -624,13 +630,18 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) else length = sock_recvmsg(server->ssocket, smb_msg, 0); - if (server->tcpStatus == CifsExiting) + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&cifs_tcp_ses_lock); return -ESHUTDOWN; + } if (server->tcpStatus == CifsNeedReconnect) { + spin_unlock(&cifs_tcp_ses_lock); cifs_reconnect(server, false); return -ECONNABORTED; } + spin_unlock(&cifs_tcp_ses_lock); if (length == -ERESTARTSYS || length == -EAGAIN || @@ -808,9 +819,9 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) cancel_delayed_work_sync(&server->echo); cancel_delayed_work_sync(&server->resolve); - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); server->tcpStatus = CifsExiting; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); wake_up_all(&server->response_q); /* check if we have blocked requests that need to free */ @@ -1427,9 +1438,9 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) else cancel_delayed_work_sync(&server->reconnect); - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); server->tcpStatus = CifsExiting; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); cifs_crypto_secmech_release(server); @@ -1582,7 +1593,9 @@ smbd_connected: * to the struct since the kernel thread not created yet * no need to spinlock this update of tcpStatus */ + spin_lock(&cifs_tcp_ses_lock); tcp_ses->tcpStatus = CifsNeedNegotiate; + spin_unlock(&cifs_tcp_ses_lock); if ((ctx->max_credits < 20) || (ctx->max_credits > 60000)) tcp_ses->max_credits = SMB2_MAX_CREDITS_AVAILABLE; @@ -1799,15 +1812,13 @@ void cifs_put_smb_ses(struct cifs_ses *ses) spin_unlock(&cifs_tcp_ses_lock); return; } - spin_unlock(&cifs_tcp_ses_lock); /* ses_count can never go negative */ WARN_ON(ses->ses_count < 0); - spin_lock(&GlobalMid_Lock); if (ses->status == CifsGood) ses->status = CifsExiting; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); cifs_free_ipc(ses); @@ -3075,12 +3086,15 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) * for just this mount. */ reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx); + spin_lock(&cifs_tcp_ses_lock); if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && (le64_to_cpu(tcon->fsUnixInfo.Capability) & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) { + spin_unlock(&cifs_tcp_ses_lock); rc = -EACCES; goto out; } + spin_unlock(&cifs_tcp_ses_lock); } else tcon->unix_ext = 0; /* server does not support them */ @@ -3755,7 +3769,9 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, if (rc == 0) { bool is_unicode; + spin_lock(&cifs_tcp_ses_lock); tcon->tidStatus = CifsGood; + spin_unlock(&cifs_tcp_ses_lock); tcon->need_reconnect = false; tcon->tid = smb_buffer_response->Tid; bcc_ptr = pByteArea(smb_buffer_response); @@ -3859,12 +3875,12 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, rc = server->ops->negotiate(xid, ses, server); if (rc == 0) { - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedNegotiate) server->tcpStatus = CifsGood; else rc = -EHOSTDOWN; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); } return rc; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index fa9fbd6a819c..43b16b6d108c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -896,10 +896,10 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr) if (class == ERRSRV && code == ERRbaduid) { cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n", code); - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); if (mid->server->tcpStatus != CifsExiting) mid->server->tcpStatus = CifsNeedReconnect; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); } } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 61fc8cb1ec8f..03ba30843950 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -369,10 +369,10 @@ void cifs_ses_mark_for_reconnect(struct cifs_ses *ses) int i; for (i = 0; i < ses->chan_count; i++) { - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); if (ses->chans[i].server->tcpStatus != CifsExiting) ses->chans[i].server->tcpStatus = CifsNeedReconnect; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); } } @@ -1052,9 +1052,9 @@ sess_establish_session(struct sess_data *sess_data) spin_unlock(&ses->chan_lock); /* Even if one channel is active, session is in good state */ - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); ses->status = CifsGood; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); return 0; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5366202d343d..54319a789c92 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -163,7 +163,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) { __u64 mid = 0; __u16 last_mid, cur_mid; - bool collision; + bool collision, reconnect; spin_lock(&GlobalMid_Lock); @@ -215,7 +215,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) * an eventual reconnect to clean out the pending_mid_q. */ if (num_mids > 32768) - server->tcpStatus = CifsNeedReconnect; + reconnect = true; if (!collision) { mid = (__u64)cur_mid; @@ -225,6 +225,13 @@ cifs_get_next_mid(struct TCP_Server_Info *server) cur_mid++; } spin_unlock(&GlobalMid_Lock); + + if (reconnect) { + spin_lock(&cifs_tcp_ses_lock); + server->tcpStatus = CifsNeedReconnect; + spin_unlock(&cifs_tcp_ses_lock); + } + return mid; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b33b0f391a23..c2368c9110b0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -121,9 +121,13 @@ smb2_add_credits(struct TCP_Server_Info *server, optype, scredits, add); } + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedReconnect - || server->tcpStatus == CifsExiting) + || server->tcpStatus == CifsExiting) { + spin_unlock(&cifs_tcp_ses_lock); return; + } + spin_unlock(&cifs_tcp_ses_lock); switch (rc) { case -1: @@ -208,11 +212,15 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, return rc; spin_lock(&server->req_lock); } else { + spin_unlock(&server->req_lock); + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->req_lock); + spin_unlock(&cifs_tcp_ses_lock); return -ENOENT; } + spin_unlock(&cifs_tcp_ses_lock); + spin_lock(&server->req_lock); scredits = server->credits; /* can deadlock with reopen */ if (scredits <= 8) { @@ -4983,10 +4991,12 @@ static void smb2_decrypt_offload(struct work_struct *work) mid->callback(mid); } else { + spin_lock(&cifs_tcp_ses_lock); spin_lock(&GlobalMid_Lock); if (dw->server->tcpStatus == CifsNeedReconnect) { mid->mid_state = MID_RETRY_NEEDED; spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); mid->callback(mid); } else { mid->mid_state = MID_REQUEST_SUBMITTED; @@ -4994,6 +5004,7 @@ static void smb2_decrypt_offload(struct work_struct *work) list_add_tail(&mid->qhead, &dw->server->pending_mid_q); spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); } } cifs_mid_q_entry_release(mid); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 9e7b213dbef5..0c18b6f4f9eb 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -162,6 +162,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL) return 0; + spin_lock(&cifs_tcp_ses_lock); if (tcon->tidStatus == CifsExiting) { /* * only tree disconnect, open, and write, @@ -171,11 +172,13 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, if ((smb2_command != SMB2_WRITE) && (smb2_command != SMB2_CREATE) && (smb2_command != SMB2_TREE_DISCONNECT)) { + spin_unlock(&cifs_tcp_ses_lock); cifs_dbg(FYI, "can not send cmd %d while umounting\n", smb2_command); return -ENODEV; } } + spin_unlock(&cifs_tcp_ses_lock); if ((!tcon->ses) || (tcon->ses->status == CifsExiting) || (!tcon->ses->server) || !server) return -EIO; @@ -214,8 +217,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, } /* are we still trying to reconnect? */ - if (server->tcpStatus != CifsNeedReconnect) + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus != CifsNeedReconnect) { + spin_unlock(&cifs_tcp_ses_lock); break; + } + spin_unlock(&cifs_tcp_ses_lock); if (retries && --retries) continue; @@ -255,11 +262,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, * and the server never sends an answer the socket will be closed * and tcpStatus set to reconnect. */ + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedReconnect) { + spin_unlock(&cifs_tcp_ses_lock); rc = -EHOSTDOWN; mutex_unlock(&ses->session_mutex); goto out; } + spin_unlock(&cifs_tcp_ses_lock); /* * need to prevent multiple threads trying to simultaneously @@ -1386,9 +1396,9 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) spin_unlock(&ses->chan_lock); /* Even if one channel is active, session is in good state */ - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); ses->status = CifsGood; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); return rc; } @@ -1917,7 +1927,9 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, tcon->share_flags = le32_to_cpu(rsp->ShareFlags); tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */ tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); + spin_lock(&cifs_tcp_ses_lock); tcon->tidStatus = CifsGood; + spin_unlock(&cifs_tcp_ses_lock); tcon->need_reconnect = false; tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId); strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); @@ -3854,11 +3866,14 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedNegotiate) { + spin_unlock(&cifs_tcp_ses_lock); /* No need to send echo on newly established connections */ mod_delayed_work(cifsiod_wq, &server->reconnect, 0); return rc; } + spin_unlock(&cifs_tcp_ses_lock); rc = smb2_plain_req_init(SMB2_ECHO, NULL, server, (void **)&req, &total_len); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 112adf153807..b70a49b4edc0 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -634,8 +634,12 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (!is_signed) return 0; - if (server->tcpStatus == CifsNeedNegotiate) + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsNeedNegotiate) { + spin_unlock(&cifs_tcp_ses_lock); return 0; + } + spin_unlock(&cifs_tcp_ses_lock); if (!is_binding && !server->session_estab) { strncpy(shdr->Signature, "BSRSPYL", 8); return 0; @@ -751,30 +755,41 @@ static int smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server, struct smb2_hdr *shdr, struct mid_q_entry **mid) { - if (server->tcpStatus == CifsExiting) + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&cifs_tcp_ses_lock); return -ENOENT; + } if (server->tcpStatus == CifsNeedReconnect) { + spin_unlock(&cifs_tcp_ses_lock); cifs_dbg(FYI, "tcp session dead - return to caller to retry\n"); return -EAGAIN; } if (server->tcpStatus == CifsNeedNegotiate && - shdr->Command != SMB2_NEGOTIATE) + shdr->Command != SMB2_NEGOTIATE) { + spin_unlock(&cifs_tcp_ses_lock); return -EAGAIN; + } if (ses->status == CifsNew) { if ((shdr->Command != SMB2_SESSION_SETUP) && - (shdr->Command != SMB2_NEGOTIATE)) + (shdr->Command != SMB2_NEGOTIATE)) { + spin_unlock(&cifs_tcp_ses_lock); return -EAGAIN; + } /* else ok - we are setting up session */ } if (ses->status == CifsExiting) { - if (shdr->Command != SMB2_LOGOFF) + if (shdr->Command != SMB2_LOGOFF) { + spin_unlock(&cifs_tcp_ses_lock); return -EAGAIN; + } /* else ok - we are shutting down the session */ } + spin_unlock(&cifs_tcp_ses_lock); *mid = smb2_mid_entry_alloc(shdr, server); if (*mid == NULL) @@ -847,9 +862,13 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) (struct smb2_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedNegotiate && - shdr->Command != SMB2_NEGOTIATE) + shdr->Command != SMB2_NEGOTIATE) { + spin_unlock(&cifs_tcp_ses_lock); return ERR_PTR(-EAGAIN); + } + spin_unlock(&cifs_tcp_ses_lock); smb2_seq_num_into_buf(server, shdr); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1d81681d9b97..1c400ca26383 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -430,9 +430,9 @@ unmask: * be taken as the remainder of this one. We need to kill the * socket so the server throws away the partial SMB */ - spin_lock(&GlobalMid_Lock); + spin_lock(&cifs_tcp_ses_lock); server->tcpStatus = CifsNeedReconnect; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&cifs_tcp_ses_lock); trace_smb3_partial_send_reconnect(server->CurrentMid, server->conn_id, server->hostname); } @@ -578,10 +578,14 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, return -ERESTARTSYS; spin_lock(&server->req_lock); } else { + spin_unlock(&server->req_lock); + + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->req_lock); + spin_unlock(&cifs_tcp_ses_lock); return -ENOENT; } + spin_unlock(&cifs_tcp_ses_lock); /* * For normal commands, reserve the last MAX_COMPOUND @@ -596,6 +600,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, * for servers that are slow to hand out credits on * new sessions. */ + spin_lock(&server->req_lock); if (!optype && num_credits == 1 && server->in_flight > 2 * MAX_COMPOUND && *credits <= MAX_COMPOUND) { @@ -723,28 +728,36 @@ cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, struct mid_q_entry **ppmidQ) { + spin_lock(&cifs_tcp_ses_lock); if (ses->server->tcpStatus == CifsExiting) { + spin_unlock(&cifs_tcp_ses_lock); return -ENOENT; } if (ses->server->tcpStatus == CifsNeedReconnect) { + spin_unlock(&cifs_tcp_ses_lock); cifs_dbg(FYI, "tcp session dead - return to caller to retry\n"); return -EAGAIN; } if (ses->status == CifsNew) { if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && - (in_buf->Command != SMB_COM_NEGOTIATE)) + (in_buf->Command != SMB_COM_NEGOTIATE)) { + spin_unlock(&cifs_tcp_ses_lock); return -EAGAIN; + } /* else ok - we are setting up session */ } if (ses->status == CifsExiting) { /* check if SMB session is bad because we are setting it up */ - if (in_buf->Command != SMB_COM_LOGOFF_ANDX) + if (in_buf->Command != SMB_COM_LOGOFF_ANDX) { + spin_unlock(&cifs_tcp_ses_lock); return -EAGAIN; + } /* else ok - we are shutting down session */ } + spin_unlock(&cifs_tcp_ses_lock); *ppmidQ = AllocMidQEntry(in_buf, ses->server); if (*ppmidQ == NULL) @@ -1085,8 +1098,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, return -EIO; } - if (server->tcpStatus == CifsExiting) + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&cifs_tcp_ses_lock); return -ENOENT; + } + spin_unlock(&cifs_tcp_ses_lock); /* * Wait for all the requests to become available. @@ -1189,11 +1206,17 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, /* * Compounding is never used during session establish. */ + spin_lock(&cifs_tcp_ses_lock); if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { + spin_unlock(&cifs_tcp_ses_lock); + mutex_lock(&server->srv_mutex); smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec); mutex_unlock(&server->srv_mutex); + + spin_lock(&cifs_tcp_ses_lock); } + spin_unlock(&cifs_tcp_ses_lock); for (i = 0; i < num_rqst; i++) { rc = wait_for_response(server, midQ[i]); @@ -1256,15 +1279,19 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, /* * Compounding is never used during session establish. */ + spin_lock(&cifs_tcp_ses_lock); if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { struct kvec iov = { .iov_base = resp_iov[0].iov_base, .iov_len = resp_iov[0].iov_len }; + spin_unlock(&cifs_tcp_ses_lock); mutex_lock(&server->srv_mutex); smb311_update_preauth_hash(ses, server, &iov, 1); mutex_unlock(&server->srv_mutex); + spin_lock(&cifs_tcp_ses_lock); } + spin_unlock(&cifs_tcp_ses_lock); out: /* @@ -1353,8 +1380,12 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, return -EIO; } - if (server->tcpStatus == CifsExiting) + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&cifs_tcp_ses_lock); return -ENOENT; + } + spin_unlock(&cifs_tcp_ses_lock); /* Ensure that we do not send more than 50 overlapping requests to the same server. We may make this configurable later or @@ -1494,8 +1525,12 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, return -EIO; } - if (server->tcpStatus == CifsExiting) + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&cifs_tcp_ses_lock); return -ENOENT; + } + spin_unlock(&cifs_tcp_ses_lock); /* Ensure that we do not send more than 50 overlapping requests to the same server. We may make this configurable later or @@ -1553,10 +1588,12 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, (server->tcpStatus != CifsNew))); /* Were we interrupted by a signal ? */ + spin_lock(&cifs_tcp_ses_lock); if ((rc == -ERESTARTSYS) && (midQ->mid_state == MID_REQUEST_SUBMITTED) && ((server->tcpStatus == CifsGood) || (server->tcpStatus == CifsNew))) { + spin_unlock(&cifs_tcp_ses_lock); if (in_buf->Command == SMB_COM_TRANSACTION2) { /* POSIX lock. We send a NT_CANCEL SMB to cause the @@ -1595,7 +1632,9 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, /* We got the response - restart system call. */ rstart = 1; + spin_lock(&cifs_tcp_ses_lock); } + spin_unlock(&cifs_tcp_ses_lock); rc = cifs_sync_mid_result(midQ, server); if (rc != 0) From 1913e1116a3174648cf2e6faedf29204f31cc438 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Fri, 7 Jan 2022 19:51:39 -0300 Subject: [PATCH 299/493] cifs: fix hang on cifs_get_next_mid() Mount will hang if using SMB1 and DFS. This is because every call to get_next_mid() will, unconditionally, mark tcpStatus to CifsNeedReconnect before even establishing the initial connect, because "reconnect" variable was not initialized. Initializing "reconnect" to false fix this issue. Fixes: 220c5bc25d87 ("cifs: take cifs_tcp_ses_lock for status checks") Signed-off-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/cifs/smb1ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 54319a789c92..6364c09296e8 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -163,7 +163,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) { __u64 mid = 0; __u16 last_mid, cur_mid; - bool collision, reconnect; + bool collision, reconnect = false; spin_lock(&GlobalMid_Lock); From 73f9bfbe3d818bb52266d5c9f3ba57d97842ffe7 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 19 Jul 2021 17:37:52 +0000 Subject: [PATCH 300/493] cifs: maintain a state machine for tcp/smb/tcon sessions If functions like cifs_negotiate_protocol, cifs_setup_session, cifs_tree_connect are called in parallel on different channels, each of these will be execute the requests. This maybe unnecessary in some cases, and only the first caller may need to do the work. This is achieved by having more states for the tcp/smb/tcon session status fields. And tracking the state of reconnection based on the state machine. For example: for tcp connections: CifsNew/CifsNeedReconnect -> CifsNeedNegotiate -> CifsInNegotiate -> CifsNeedSessSetup -> CifsInSessSetup -> CifsGood for smb sessions: CifsNew/CifsNeedReconnect -> CifsGood for tcon: CifsNew/CifsNeedReconnect -> CifsInFilesInvalidate -> CifsNeedTcon -> CifsInTcon -> CifsGood If any channel reconnect sees that it's in the middle of transition to CifsGood, then they can skip the function. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 8 +++++- fs/cifs/cifssmb.c | 24 ++++++++++++------ fs/cifs/connect.c | 61 +++++++++++++++++++++++++++++++++++++--------- fs/cifs/sess.c | 8 +++--- fs/cifs/smb2pdu.c | 16 ++++-------- 5 files changed, 82 insertions(+), 35 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 4ba35faff79c..f88d2b10045a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -113,7 +113,13 @@ enum statusEnum { CifsGood, CifsExiting, CifsNeedReconnect, - CifsNeedNegotiate + CifsNeedNegotiate, + CifsInNegotiate, + CifsNeedSessSetup, + CifsInSessSetup, + CifsNeedTcon, + CifsInTcon, + CifsInFilesInvalidate }; enum securityEnum { diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3ef2796e2f24..071e2f21a7db 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -73,6 +73,16 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) struct list_head *tmp; struct list_head *tmp1; + /* only send once per connect */ + spin_lock(&cifs_tcp_ses_lock); + if (tcon->ses->status != CifsGood || + tcon->tidStatus != CifsNeedReconnect) { + spin_unlock(&cifs_tcp_ses_lock); + return; + } + tcon->tidStatus = CifsInFilesInvalidate; + spin_unlock(&cifs_tcp_ses_lock); + /* list all files open on tree connection and mark them invalid */ spin_lock(&tcon->open_file_lock); list_for_each_safe(tmp, tmp1, &tcon->openFileList) { @@ -89,6 +99,11 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) memset(tcon->crfid.fid, 0, sizeof(struct cifs_fid)); mutex_unlock(&tcon->crfid.fid_mutex); + spin_lock(&cifs_tcp_ses_lock); + if (tcon->tidStatus == CifsInFilesInvalidate) + tcon->tidStatus = CifsNeedTcon; + spin_unlock(&cifs_tcp_ses_lock); + /* * BB Add call to invalidate_inodes(sb) for all superblocks mounted * to this tcon. @@ -182,12 +197,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) nls_codepage = load_nls_default(); - /* - * need to prevent multiple threads trying to simultaneously - * reconnect the same SMB session - */ - mutex_lock(&ses->session_mutex); - /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed @@ -197,7 +206,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&cifs_tcp_ses_lock); rc = -EHOSTDOWN; - mutex_unlock(&ses->session_mutex); goto out; } spin_unlock(&cifs_tcp_ses_lock); @@ -215,11 +223,11 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) goto skip_sess_setup; rc = -EHOSTDOWN; - mutex_unlock(&ses->session_mutex); goto out; } spin_unlock(&ses->chan_lock); + mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(0, ses, server); if (!rc) rc = cifs_setup_session(0, ses, server, nls_codepage); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 815f629933de..a408187c7002 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -208,10 +208,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) goto next_session; + ses->status = CifsNeedReconnect; num_sessions++; - list_for_each_entry(tcon, &ses->tcon_list, tcon_list) + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { tcon->need_reconnect = true; + tcon->tidStatus = CifsNeedReconnect; + } if (ses->tcon_ipc) ses->tcon_ipc->need_reconnect = true; @@ -2035,12 +2038,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) cifs_dbg(FYI, "Existing smb sess found (status=%d)\n", ses->status); - mutex_lock(&ses->session_mutex); spin_lock(&ses->chan_lock); if (cifs_chan_needs_reconnect(ses, server)) { spin_unlock(&ses->chan_lock); cifs_dbg(FYI, "Session needs reconnect\n"); + mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses, server); if (rc) { mutex_unlock(&ses->session_mutex); @@ -2059,10 +2062,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) free_xid(xid); return ERR_PTR(rc); } + mutex_unlock(&ses->session_mutex); + spin_lock(&ses->chan_lock); } spin_unlock(&ses->chan_lock); - mutex_unlock(&ses->session_mutex); /* existing SMB ses has a server reference already */ cifs_put_tcp_session(server, 0); @@ -2112,7 +2116,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) ses->sectype = ctx->sectype; ses->sign = ctx->sign; - mutex_lock(&ses->session_mutex); /* add server as first channel */ spin_lock(&ses->chan_lock); @@ -2122,15 +2125,16 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) ses->chans_need_reconnect = 1; spin_unlock(&ses->chan_lock); + mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses, server); if (!rc) rc = cifs_setup_session(xid, ses, server, ctx->local_nls); + mutex_unlock(&ses->session_mutex); /* each channel uses a different signing key */ memcpy(ses->chans[0].signkey, ses->smb3signingkey, sizeof(ses->smb3signingkey)); - mutex_unlock(&ses->session_mutex); if (rc) goto get_ses_fail; @@ -2347,10 +2351,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) } } - /* - * BB Do we need to wrap session_mutex around this TCon call and Unix - * SetFS as we do on SessSetup and reconnect? - */ xid = get_xid(); rc = ses->server->ops->tree_connect(xid, ses, ctx->UNC, tcon, ctx->local_nls); @@ -3870,14 +3870,20 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, return -ENOSYS; /* only send once per connect */ - if (!server->ops->need_neg(server)) + spin_lock(&cifs_tcp_ses_lock); + if (!server->ops->need_neg(server) || + server->tcpStatus != CifsNeedNegotiate) { + spin_unlock(&cifs_tcp_ses_lock); return 0; + } + server->tcpStatus = CifsInNegotiate; + spin_unlock(&cifs_tcp_ses_lock); rc = server->ops->negotiate(xid, ses, server); if (rc == 0) { spin_lock(&cifs_tcp_ses_lock); - if (server->tcpStatus == CifsNeedNegotiate) - server->tcpStatus = CifsGood; + if (server->tcpStatus == CifsInNegotiate) + server->tcpStatus = CifsNeedSessSetup; else rc = -EHOSTDOWN; spin_unlock(&cifs_tcp_ses_lock); @@ -3894,6 +3900,15 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, int rc = -ENOSYS; bool is_binding = false; + /* only send once per connect */ + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus != CifsNeedSessSetup) { + spin_unlock(&cifs_tcp_ses_lock); + return 0; + } + ses->status = CifsInSessSetup; + spin_unlock(&cifs_tcp_ses_lock); + spin_lock(&ses->chan_lock); is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); spin_unlock(&ses->chan_lock); @@ -4264,6 +4279,17 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t struct dfs_cache_tgt_iterator *tit; bool target_match; + /* only send once per connect */ + spin_lock(&cifs_tcp_ses_lock); + if (tcon->ses->status != CifsGood || + (tcon->tidStatus != CifsNew && + tcon->tidStatus != CifsNeedTcon)) { + spin_unlock(&cifs_tcp_ses_lock); + return 0; + } + tcon->tidStatus = CifsInTcon; + spin_unlock(&cifs_tcp_ses_lock); + extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); tit = dfs_cache_get_tgt_iterator(tl); @@ -4422,6 +4448,17 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru { const struct smb_version_operations *ops = tcon->ses->server->ops; + /* only send once per connect */ + spin_lock(&cifs_tcp_ses_lock); + if (tcon->ses->status != CifsGood || + (tcon->tidStatus != CifsNew && + tcon->tidStatus != CifsNeedTcon)) { + spin_unlock(&cifs_tcp_ses_lock); + return 0; + } + tcon->tidStatus = CifsInTcon; + spin_unlock(&cifs_tcp_ses_lock); + return ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc); } #endif diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 03ba30843950..d12490e12be5 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -308,7 +308,6 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, chan_server = cifs_get_tcp_session(&ctx, ses->server); - mutex_lock(&ses->session_mutex); spin_lock(&ses->chan_lock); chan = &ses->chans[ses->chan_count]; chan->server = chan_server; @@ -326,6 +325,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, spin_unlock(&ses->chan_lock); + mutex_lock(&ses->session_mutex); /* * We need to allocate the server crypto now as we will need * to sign packets before we generate the channel signing key @@ -334,6 +334,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, rc = smb311_crypto_shash_allocate(chan->server); if (rc) { cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); + mutex_unlock(&ses->session_mutex); goto out; } @@ -341,6 +342,8 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, if (!rc) rc = cifs_setup_session(xid, ses, chan->server, cifs_sb->local_nls); + mutex_unlock(&ses->session_mutex); + out: if (rc && chan->server) { spin_lock(&ses->chan_lock); @@ -355,8 +358,6 @@ out: spin_unlock(&ses->chan_lock); } - mutex_unlock(&ses->session_mutex); - if (rc && chan->server) cifs_put_tcp_session(chan->server, 0); @@ -1053,6 +1054,7 @@ sess_establish_session(struct sess_data *sess_data) /* Even if one channel is active, session is in good state */ spin_lock(&cifs_tcp_ses_lock); + server->tcpStatus = CifsGood; ses->status = CifsGood; spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0c18b6f4f9eb..2725e62470e4 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -251,12 +251,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, nls_codepage = load_nls_default(); - /* - * need to prevent multiple threads trying to simultaneously reconnect - * the same SMB session - */ - mutex_lock(&ses->session_mutex); - /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed @@ -266,7 +260,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&cifs_tcp_ses_lock); rc = -EHOSTDOWN; - mutex_unlock(&ses->session_mutex); goto out; } spin_unlock(&cifs_tcp_ses_lock); @@ -284,23 +277,23 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, goto skip_sess_setup; rc = -EHOSTDOWN; - mutex_unlock(&ses->session_mutex); goto out; } spin_unlock(&ses->chan_lock); + mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(0, ses, server); if (!rc) { rc = cifs_setup_session(0, ses, server, nls_codepage); if ((rc == -EACCES) && !tcon->retry) { - rc = -EHOSTDOWN; mutex_unlock(&ses->session_mutex); + rc = -EHOSTDOWN; goto failed; } } if (rc || !tcon->need_reconnect) { - mutex_unlock(&tcon->ses->session_mutex); + mutex_unlock(&ses->session_mutex); goto out; } @@ -310,7 +303,7 @@ skip_sess_setup: tcon->need_reopen_files = true; rc = cifs_tree_connect(0, tcon, nls_codepage); - mutex_unlock(&tcon->ses->session_mutex); + mutex_unlock(&ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) { @@ -1397,6 +1390,7 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) /* Even if one channel is active, session is in good state */ spin_lock(&cifs_tcp_ses_lock); + server->tcpStatus = CifsGood; ses->status = CifsGood; spin_unlock(&cifs_tcp_ses_lock); From bda487ac4bebf871255cc6f23e16f702cea0ca7c Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 25 Oct 2021 05:44:10 +0000 Subject: [PATCH 301/493] cifs: avoid race during socket reconnect between send and recv When a TCP connection gets reestablished by the sender in cifs_reconnect, There is a chance for race condition with demultiplex thread waiting in cifs_readv_from_socket on the old socket. It will now return -ECONNRESET. This condition is handled by comparing socket pointer before and after sock_recvmsg. If the socket pointer has changed, we should not call cifs_reconnect again, but instead retry with new socket. Also fixed another bug in my prev mchan commits. We should always reestablish session (even if binding) on a channel that needs reconnection. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 14 +++----------- fs/cifs/smb2pdu.c | 3 +-- fs/cifs/transport.c | 13 ++----------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a408187c7002..1dafaf7c4e5e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -172,12 +172,11 @@ static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) { - unsigned int num_sessions = 0; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct mid_q_entry *mid, *nmid; struct list_head retry_list; - struct TCP_Server_Info *pserver; server->maxBuf = 0; server->max_read = 0; @@ -199,17 +198,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) goto next_session; - if (mark_smb_session) - CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses); - else - cifs_chan_set_need_reconnect(ses, server); + cifs_chan_set_need_reconnect(ses, server); /* If all channels need reconnect, then tcon needs reconnect */ if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) goto next_session; ses->status = CifsNeedReconnect; - num_sessions++; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { tcon->need_reconnect = true; @@ -223,16 +218,13 @@ next_session: } spin_unlock(&cifs_tcp_ses_lock); - if (num_sessions == 0) - return; /* * before reconnecting the tcp session, mark the smb session (uid) * and the tid bad so they are not used until reconnected */ - cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", + cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect and tearing down socket\n", __func__); /* do not want to be sending data on a socket we are freeing */ - cifs_dbg(FYI, "%s: tearing down socket\n", __func__); mutex_lock(&server->srv_mutex); if (server->ssocket) { cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2725e62470e4..8d471df69c59 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -276,7 +276,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, if (tcon->need_reconnect) goto skip_sess_setup; - rc = -EHOSTDOWN; goto out; } spin_unlock(&ses->chan_lock); @@ -3858,7 +3857,7 @@ SMB2_echo(struct TCP_Server_Info *server) .rq_nvec = 1 }; unsigned int total_len; - cifs_dbg(FYI, "In echo request\n"); + cifs_dbg(FYI, "In echo request for conn_id %lld\n", server->conn_id); spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedNegotiate) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1c400ca26383..93f0e8c1ea23 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -1057,18 +1057,9 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) if (!ses) return NULL; - spin_lock(&ses->chan_lock); /* round robin */ -pick_another: - if (ses->chan_count > 1 && - !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { - index = (uint)atomic_inc_return(&ses->chan_seq); - index %= ses->chan_count; - - if (CIFS_CHAN_NEEDS_RECONNECT(ses, index)) - goto pick_another; - } - spin_unlock(&ses->chan_lock); + index = (uint)atomic_inc_return(&ses->chan_seq); + index %= ses->chan_count; return ses->chans[index].server; } From 64aa8f4b6df107f46b6ac3e5331819824626b0af Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 Jan 2022 22:52:10 +0100 Subject: [PATCH 302/493] dmaengine: pch_dma: Remove usage of the deprecated "pci-dma-compat.h" API In [1], Christoph Hellwig has proposed to remove the wrappers in include/linux/pci-dma-compat.h. Some reasons why this API should be removed have been given by Julia Lawall in [2]. A coccinelle script has been used to perform the needed transformation. It can be found in [3]. [1]: https://lore.kernel.org/kernel-janitors/20200421081257.GA131897@infradead.org/ [2]: https://lore.kernel.org/kernel-janitors/alpine.DEB.2.22.394.2007120902170.2424@hadrien/ [3]: https://lore.kernel.org/kernel-janitors/20200716192821.321233-1-christophe.jaillet@wanadoo.fr/ Signed-off-by: Christophe JAILLET Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/b88f25f3d07be92dd75494dc129a85619afb1366.1641500561.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 1da04112fcdb..c359decc07a3 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -835,7 +835,7 @@ static int pch_dma_probe(struct pci_dev *pdev, goto err_disable_pdev; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Cannot set proper DMA config\n"); goto err_free_res; From bbd0ff07ed12fda9dbd0cc5f239bb678a775833a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 6 Jan 2022 12:25:10 -0600 Subject: [PATCH 303/493] dt-bindings: dma-controller: Split interrupt fields in example Best practice for multi-cell property values is to bracket each multi-cell value. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220106182518.1435497-2-robh@kernel.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/dma-controller.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/dma-controller.yaml b/Documentation/devicetree/bindings/dma/dma-controller.yaml index 0043b91da95e..6d3727267fa8 100644 --- a/Documentation/devicetree/bindings/dma/dma-controller.yaml +++ b/Documentation/devicetree/bindings/dma/dma-controller.yaml @@ -24,10 +24,10 @@ examples: dma: dma-controller@48000000 { compatible = "ti,omap-sdma"; reg = <0x48000000 0x1000>; - interrupts = <0 12 0x4 - 0 13 0x4 - 0 14 0x4 - 0 15 0x4>; + interrupts = <0 12 0x4>, + <0 13 0x4>, + <0 14 0x4>, + <0 15 0x4>; #dma-cells = <1>; dma-channels = <32>; dma-requests = <127>; From 8a78050ee257c8d4292ea8a6b52bb9c894306b9b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 8 Jan 2022 23:09:20 -0800 Subject: [PATCH 304/493] Input: axp20x-pek - revert "always register interrupt handlers" change The power button on Cherry Trail systems with an AXP288 PMIC is connected to both the power button pin of the PMIC as well as to a power button GPIO on the Cherry Trail SoC itself. This leads to double power button event reporting which is a problem. Since reporting power button presses through the PMIC is not supported on all PMICs used on Cherry Trail systems, we want to keep the GPIO power button events, so the axp20x-pek code checks for the presence of a GPIO power button and in that case does not register its input-device. On most systems the GPIO power button also can wake-up the system from suspend, so the axp20x-pek driver would also not register its interrupt handler. But on some systems there was a bug causing wakeup by the GPIO power button handler to not work. Commit 9747070c11d6 ("Input: axp20x-pek - always register interrupt handlers") was added as a work around for this registering the axp20x-pek interrupts, but not the input-device on Cherry Trail systems. In the mean time the root-cause of the GPIO power button wakeup events not working has been found and fixed by the "pinctrl: cherryview: Do not allow the same interrupt line to be used by 2 pins" patch, so this is no longer necessary. This reverts the workaround going back to only registering the interrupt handlers on systems where we also register the input-device. Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20220106111647.66520-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/axp20x-pek.c | 72 ++++++++++++++++----------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index 9c6386b2af33..c8f87df93a50 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -205,11 +205,8 @@ ATTRIBUTE_GROUPS(axp20x); static irqreturn_t axp20x_pek_irq(int irq, void *pwr) { - struct axp20x_pek *axp20x_pek = pwr; - struct input_dev *idev = axp20x_pek->input; - - if (!idev) - return IRQ_HANDLED; + struct input_dev *idev = pwr; + struct axp20x_pek *axp20x_pek = input_get_drvdata(idev); /* * The power-button is connected to ground so a falling edge (dbf) @@ -228,9 +225,22 @@ static irqreturn_t axp20x_pek_irq(int irq, void *pwr) static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek, struct platform_device *pdev) { + struct axp20x_dev *axp20x = axp20x_pek->axp20x; struct input_dev *idev; int error; + axp20x_pek->irq_dbr = platform_get_irq_byname(pdev, "PEK_DBR"); + if (axp20x_pek->irq_dbr < 0) + return axp20x_pek->irq_dbr; + axp20x_pek->irq_dbr = regmap_irq_get_virq(axp20x->regmap_irqc, + axp20x_pek->irq_dbr); + + axp20x_pek->irq_dbf = platform_get_irq_byname(pdev, "PEK_DBF"); + if (axp20x_pek->irq_dbf < 0) + return axp20x_pek->irq_dbf; + axp20x_pek->irq_dbf = regmap_irq_get_virq(axp20x->regmap_irqc, + axp20x_pek->irq_dbf); + axp20x_pek->input = devm_input_allocate_device(&pdev->dev); if (!axp20x_pek->input) return -ENOMEM; @@ -245,6 +255,24 @@ static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek, input_set_drvdata(idev, axp20x_pek); + error = devm_request_any_context_irq(&pdev->dev, axp20x_pek->irq_dbr, + axp20x_pek_irq, 0, + "axp20x-pek-dbr", idev); + if (error < 0) { + dev_err(&pdev->dev, "Failed to request dbr IRQ#%d: %d\n", + axp20x_pek->irq_dbr, error); + return error; + } + + error = devm_request_any_context_irq(&pdev->dev, axp20x_pek->irq_dbf, + axp20x_pek_irq, 0, + "axp20x-pek-dbf", idev); + if (error < 0) { + dev_err(&pdev->dev, "Failed to request dbf IRQ#%d: %d\n", + axp20x_pek->irq_dbf, error); + return error; + } + error = input_register_device(idev); if (error) { dev_err(&pdev->dev, "Can't register input device: %d\n", @@ -252,6 +280,8 @@ static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek, return error; } + device_init_wakeup(&pdev->dev, true); + return 0; } @@ -309,18 +339,6 @@ static int axp20x_pek_probe(struct platform_device *pdev) axp20x_pek->axp20x = dev_get_drvdata(pdev->dev.parent); - axp20x_pek->irq_dbr = platform_get_irq_byname(pdev, "PEK_DBR"); - if (axp20x_pek->irq_dbr < 0) - return axp20x_pek->irq_dbr; - axp20x_pek->irq_dbr = regmap_irq_get_virq( - axp20x_pek->axp20x->regmap_irqc, axp20x_pek->irq_dbr); - - axp20x_pek->irq_dbf = platform_get_irq_byname(pdev, "PEK_DBF"); - if (axp20x_pek->irq_dbf < 0) - return axp20x_pek->irq_dbf; - axp20x_pek->irq_dbf = regmap_irq_get_virq( - axp20x_pek->axp20x->regmap_irqc, axp20x_pek->irq_dbf); - if (axp20x_pek_should_register_input(axp20x_pek, pdev)) { error = axp20x_pek_probe_input_device(axp20x_pek, pdev); if (error) @@ -329,26 +347,6 @@ static int axp20x_pek_probe(struct platform_device *pdev) axp20x_pek->info = (struct axp20x_info *)match->driver_data; - error = devm_request_any_context_irq(&pdev->dev, axp20x_pek->irq_dbr, - axp20x_pek_irq, 0, - "axp20x-pek-dbr", axp20x_pek); - if (error < 0) { - dev_err(&pdev->dev, "Failed to request dbr IRQ#%d: %d\n", - axp20x_pek->irq_dbr, error); - return error; - } - - error = devm_request_any_context_irq(&pdev->dev, axp20x_pek->irq_dbf, - axp20x_pek_irq, 0, - "axp20x-pek-dbf", axp20x_pek); - if (error < 0) { - dev_err(&pdev->dev, "Failed to request dbf IRQ#%d: %d\n", - axp20x_pek->irq_dbf, error); - return error; - } - - device_init_wakeup(&pdev->dev, true); - platform_set_drvdata(pdev, axp20x_pek); return 0; From fdbb8025263246d770b802567757871db7a05913 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 8 Jan 2022 23:21:55 -0800 Subject: [PATCH 305/493] dt-bindings: input/ts/zinitix: Convert to YAML, fix and extend This converts the Zinitix BT4xx and BT5xx touchscreen bindings to YAML, fix them up a bit and extends them. We list all the existing BT4xx and BT5xx components with compatible strings. These are all similar, use the same bindings and work in similar ways. We rename the supplies from the erroneous vdd/vddo to the actual supply names vcca/vdd as specified on the actual component. It is long established that supplies shall be named after the supply pin names of a component. The confusion probably stems from that in a certain product the rails to the component were named vdd/vddo. Drop some notes on how OS implementations should avoid confusion by first looking for vddo, and if that exists assume the legacy binding pair and otherwise use vcca/vdd. Add reset-gpios as sometimes manufacturers pulls a GPIO line to the reset line on the chip. Add optional touchscreen-fuzz-x and touchscreen-fuzz-y properties. Reviewed-by: Rob Herring Signed-off-by: Linus Walleij [Fixed dt_schema_check] Signed-off-by: Nikita Travkin Link: https://lore.kernel.org/r/20220106072840.36851-3-nikita@trvn.ru Signed-off-by: Dmitry Torokhov --- .../input/touchscreen/zinitix,bt400.yaml | 115 ++++++++++++++++++ .../bindings/input/touchscreen/zinitix.txt | 40 ------ 2 files changed, 115 insertions(+), 40 deletions(-) create mode 100644 Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml delete mode 100644 Documentation/devicetree/bindings/input/touchscreen/zinitix.txt diff --git a/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml new file mode 100644 index 000000000000..b4e5ba7c0b49 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/zinitix,bt400.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Zinitix BT4xx and BT5xx series touchscreen controller bindings + +description: The Zinitix BT4xx and BT5xx series of touchscreen controllers + are Korea-produced touchscreens with embedded microcontrollers. The + BT4xx series was produced 2010-2013 and the BT5xx series 2013-2014. + +maintainers: + - Michael Srba + - Linus Walleij + +allOf: + - $ref: touchscreen.yaml# + +properties: + $nodename: + pattern: "^touchscreen(@.*)?$" + + compatible: + enum: + - zinitix,bt402 + - zinitix,bt403 + - zinitix,bt404 + - zinitix,bt412 + - zinitix,bt413 + - zinitix,bt431 + - zinitix,bt432 + - zinitix,bt531 + - zinitix,bt532 + - zinitix,bt538 + - zinitix,bt541 + - zinitix,bt548 + - zinitix,bt554 + - zinitix,at100 + + reg: + description: I2C address on the I2C bus + + clock-frequency: + description: I2C client clock frequency, defined for host when using + the device on the I2C bus + minimum: 0 + maximum: 400000 + + interrupts: + description: Interrupt to host + maxItems: 1 + + vcca-supply: + description: Analog power supply regulator on the VCCA pin + + vdd-supply: + description: Digital power supply regulator on the VDD pin. + In older device trees this can be the accidental name for the analog + supply on the VCCA pin, and in that case the deprecated vddo-supply is + used for the digital power supply. + + vddo-supply: + description: Deprecated name for the digital power supply, use vdd-supply + as this reflects the real name of the pin. If this supply is present, + the vdd-supply represents VCCA instead of VDD. Implementers should first + check for this property, and if it is present assume that the vdd-supply + represents the analog supply. + deprecated: true + + reset-gpios: + description: Reset line for the touchscreen, should be tagged + as GPIO_ACTIVE_LOW + + zinitix,mode: + description: Mode of reporting touch points. Some modes may not work + with a particular ts firmware for unknown reasons. Available modes are + 1 and 2. Mode 2 is the default and preferred. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [1, 2] + + touchscreen-size-x: true + touchscreen-size-y: true + touchscreen-fuzz-x: true + touchscreen-fuzz-y: true + +additionalProperties: false + +required: + - compatible + - reg + - interrupts + - touchscreen-size-x + - touchscreen-size-y + +examples: + - | + #include + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + touchscreen@20 { + compatible = "zinitix,bt541"; + reg = <0x20>; + interrupt-parent = <&gpio>; + interrupts = <13 IRQ_TYPE_EDGE_FALLING>; + vcca-supply = <®_vcca_tsp>; + vdd-supply = <®_vdd_tsp>; + touchscreen-size-x = <540>; + touchscreen-size-y = <960>; + zinitix,mode = <2>; + }; + }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/zinitix.txt b/Documentation/devicetree/bindings/input/touchscreen/zinitix.txt deleted file mode 100644 index 446efb9f5f55..000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/zinitix.txt +++ /dev/null @@ -1,40 +0,0 @@ -Device tree bindings for Zinitx BT541 touchscreen controller - -Required properties: - - - compatible : Should be "zinitix,bt541" - - reg : I2C address of the chip. Should be 0x20 - - interrupts : Interrupt to which the chip is connected - -Optional properties: - - - vdd-supply : Analog power supply regulator on VCCA pin - - vddo-supply : Digital power supply regulator on VDD pin - - zinitix,mode : Mode of reporting touch points. Some modes may not work - with a particular ts firmware for unknown reasons. Available - modes are 1 and 2. Mode 2 is the default and preferred. - -The touchscreen-* properties are documented in touchscreen.txt in this -directory. - -Example: - - i2c@00000000 { - /* ... */ - - bt541@20 { - compatible = "zinitix,bt541"; - reg = <0x20>; - interrupt-parent = <&msmgpio>; - interrupts = <13 IRQ_TYPE_EDGE_FALLING>; - pinctrl-names = "default"; - pinctrl-0 = <&tsp_default>; - vdd-supply = <®_vdd_tsp>; - vddo-supply = <&pm8916_l6>; - touchscreen-size-x = <540>; - touchscreen-size-y = <960>; - zinitix,mode = <2>; - }; - - /* ... */ - }; From c54be0e32e54abdf7b89d56fe9edebc2f319acee Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 8 Jan 2022 23:23:10 -0800 Subject: [PATCH 306/493] Input: zinitix - handle proper supply names The supply names of the Zinitix touchscreen were a bit confused, the new bindings rectifies this. To deal with old and new devicetrees, first check if we have "vddo" and in case that exists assume the old supply names. Else go and look for the new ones. We cannot just get the regulators since we would get an OK and a dummy regulator: we need to check explicitly for the old supply name. Use struct device *dev as a local variable instead of the I2C client since the device is what we are actually obtaining the resources from. Signed-off-by: Linus Walleij [Slightly changed the legacy regulator detection] Signed-off-by: Nikita Travkin Link: https://lore.kernel.org/r/20220106072840.36851-4-nikita@trvn.ru Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/zinitix.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index b8d901099378..cf95be9e6db5 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -252,16 +252,27 @@ static int zinitix_init_touch(struct bt541_ts_data *bt541) static int zinitix_init_regulators(struct bt541_ts_data *bt541) { - struct i2c_client *client = bt541->client; + struct device *dev = &bt541->client->dev; int error; - bt541->supplies[0].supply = "vdd"; - bt541->supplies[1].supply = "vddo"; - error = devm_regulator_bulk_get(&client->dev, + /* + * Some older device trees have erroneous names for the regulators, + * so check if "vddo" is present and in that case use these names. + * Else use the proper supply names on the component. + */ + if (of_find_property(dev->of_node, "vddo-supply", NULL)) { + bt541->supplies[0].supply = "vdd"; + bt541->supplies[1].supply = "vddo"; + } else { + /* Else use the proper supply names */ + bt541->supplies[0].supply = "vcca"; + bt541->supplies[1].supply = "vdd"; + } + error = devm_regulator_bulk_get(dev, ARRAY_SIZE(bt541->supplies), bt541->supplies); if (error < 0) { - dev_err(&client->dev, "Failed to get regulators: %d\n", error); + dev_err(dev, "Failed to get regulators: %d\n", error); return error; } From 9df136b555221e8eb3f4e5d3958d8fe11783abcf Mon Sep 17 00:00:00 2001 From: Nikita Travkin Date: Sat, 8 Jan 2022 23:23:49 -0800 Subject: [PATCH 307/493] Input: zinitix - add compatible for bt532 Zinitix BT532 is another touch controller that seem to implement the same interface as an already supported BT541. Add it to the driver. Reviewed-by: Linus Walleij Signed-off-by: Nikita Travkin Link: https://lore.kernel.org/r/20220106072840.36851-5-nikita@trvn.ru Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/zinitix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index cf95be9e6db5..cf7ee4c765a7 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -571,6 +571,7 @@ static SIMPLE_DEV_PM_OPS(zinitix_pm_ops, zinitix_suspend, zinitix_resume); #ifdef CONFIG_OF static const struct of_device_id zinitix_of_match[] = { + { .compatible = "zinitix,bt532" }, { .compatible = "zinitix,bt541" }, { } }; From 7bf2e4d5ca1c94a9b0f730498b3d01768a72dcbd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 7 Jan 2022 10:34:07 +0300 Subject: [PATCH 308/493] ACPI: pfr_telemetry: Fix info leak in pfrt_log_ioctl() The "data_info" struct is copied to the user. It has a 4 byte struct hole after the last struct member so we need to memset that to avoid copying uninitialized stack data to the user. Fixes: b0013e037a8b ("ACPI: Introduce Platform Firmware Runtime Telemetry driver") Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pfr_telemetry.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/pfr_telemetry.c b/drivers/acpi/pfr_telemetry.c index da50dd80192c..9abf350bd7a5 100644 --- a/drivers/acpi/pfr_telemetry.c +++ b/drivers/acpi/pfr_telemetry.c @@ -83,6 +83,7 @@ static int get_pfrt_log_data_info(struct pfrt_log_data_info *data_info, union acpi_object *out_obj, in_obj, in_buf; int ret = -EBUSY; + memset(data_info, 0, sizeof(*data_info)); memset(&in_obj, 0, sizeof(in_obj)); memset(&in_buf, 0, sizeof(in_buf)); in_obj.type = ACPI_TYPE_PACKAGE; From e3304c21357268ecbe156ed6247a03dc78d3fce4 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 3 Dec 2021 07:50:37 +0530 Subject: [PATCH 309/493] perf sort: Include global and local variants for p_stage_cyc sort key Sort key 'p_stage_cyc' is used to present the latency cycles spent in pipeline stages. perf has local 'p_stage_cyc' sort key to display this info. There is no global variant available for this sort key. The local variant shows latency in a single sample, whereas the global value will be useful to present the total latency (sum of latencies) in the hist entry. It represents the latency number multiplied by the number of samples. Add global ('p_stage_cyc') and local variant ('local_p_stage_cyc') for this sort key. Use 'local_p_stage_cyc' as default option for "mem" sort mode. Also add this to the list of dynamic sort keys and made the "dynamic_headers" and "arch_specific_sort_keys" as static. Reported-by: Namhyung Kim Signed-off-by: Athira Jajeev Tested-by: Nageswara R Sastry Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20211203022038.48240-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 4 +++- tools/perf/util/hist.h | 3 ++- tools/perf/util/sort.c | 34 +++++++++++++++++++++++++--------- tools/perf/util/sort.h | 3 ++- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b776465e04ef..0a8033b09e28 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); - hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); + hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); + hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); + if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 621f35ae1efa..2a15e22fb89c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,7 +75,8 @@ enum hist_column { HISTC_MEM_BLOCKED, HISTC_LOCAL_INS_LAT, HISTC_GLOBAL_INS_LAT, - HISTC_P_STAGE_CYC, + HISTC_LOCAL_P_STAGE_CYC, + HISTC_GLOBAL_P_STAGE_CYC, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a111065b484e..e417e47f51b9 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -46,8 +46,8 @@ const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; -const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; -const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; +static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"}; +static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"}; /* * Replaces all occurrences of a char used with the: @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = { }; static int64_t -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { return left->p_stage_cyc - right->p_stage_cyc; } +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, + he->p_stage_cyc * he->stat.nr_events); +} + + static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } -struct sort_entry sort_p_stage_cyc = { - .se_header = "Pipeline Stage Cycle", - .se_cmp = sort__global_p_stage_cyc_cmp, +struct sort_entry sort_local_p_stage_cyc = { + .se_header = "Local Pipeline Stage Cycle", + .se_cmp = sort__p_stage_cyc_cmp, .se_snprintf = hist_entry__p_stage_cyc_snprintf, - .se_width_idx = HISTC_P_STAGE_CYC, + .se_width_idx = HISTC_LOCAL_P_STAGE_CYC, +}; + +struct sort_entry sort_global_p_stage_cyc = { + .se_header = "Pipeline Stage Cycle", + .se_cmp = sort__p_stage_cyc_cmp, + .se_snprintf = hist_entry__global_p_stage_cyc_snprintf, + .se_width_idx = HISTC_GLOBAL_P_STAGE_CYC, }; struct sort_entry sort_mem_daddr_sym = { @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), - DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc), + DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc), + DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 7b7145501933..f994261888e1 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -235,7 +235,8 @@ enum sort_type { SORT_CODE_PAGE_SIZE, SORT_LOCAL_INS_LAT, SORT_GLOBAL_INS_LAT, - SORT_PIPELINE_STAGE_CYC, + SORT_LOCAL_PIPELINE_STAGE_CYC, + SORT_GLOBAL_PIPELINE_STAGE_CYC, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, From befee3775b6dabd7ec1bd8a44584f7f6f8fc8329 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 3 Dec 2021 07:50:38 +0530 Subject: [PATCH 310/493] perf powerpc: Update global/local variants for p_stage_cyc Update the arch_support_sort_key() function in powerpc to enable presenting local and global variants of sort key 'p_stage_cyc'. Update the "se_header" strings for these in arch_perf_header_entry() along with instruction latency. Reported-by: Namhyung Kim Signed-off-by: Athira Jajeev Tested-by: Nageswara R Sastry Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20211203022038.48240-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/event.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index 3bf441257466..cf430a4c55b9 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header) { if (!strcmp(se_header, "Local INSTR Latency")) return "Finish Cyc"; - else if (!strcmp(se_header, "Pipeline Stage Cycle")) + else if (!strcmp(se_header, "INSTR Latency")) + return "Global Finish_cyc"; + else if (!strcmp(se_header, "Local Pipeline Stage Cycle")) return "Dispatch Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Global Dispatch_cyc"; return se_header; } @@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key) { if (!strcmp(sort_key, "p_stage_cyc")) return 1; + if (!strcmp(sort_key, "local_p_stage_cyc")) + return 1; return 0; } From d5962fb7d69073bf68fb647531cfd4f0adf84be3 Mon Sep 17 00:00:00 2001 From: Dario Petrillo Date: Mon, 10 Jan 2022 00:44:41 +0100 Subject: [PATCH 311/493] perf annotate: Avoid TUI crash when navigating in the annotation of recursive functions In 'perf report', entering a recursive function from inside of itself (either directly of indirectly through some other function) results in calling symbol__annotate2 multiple() times, and freeing the whole disassembly when exiting from the innermost instance. The first issue causes the function's disassembly to be duplicated, and the latter a heap use-after-free (and crash) when trying to access the disassembly again. I reproduced the bug on perf 5.11.22 (Ubuntu 20.04.3 LTS) and 5.16.rc8 with the following testcase (compile with gcc recursive.c -o recursive). To reproduce: - perf record ./recursive - perf report - enter fibonacci and annotate it - move the cursor on one of the "callq fibonacci" instructions and press enter - at this point there will be two copies of the function in the disassembly - go back by pressing q, and perf will crash #include int fibonacci(int n) { if(n <= 2) return 1; return fibonacci(n-1) + fibonacci(n-2); } int main() { printf("%d\n", fibonacci(40)); } This patch addresses the issue by annotating a function and freeing the associated memory on exit only if no annotation is already present, so that a recursive function is only annotated on entry. Signed-off-by: Dario Petrillo Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20220109234441.325106-1-dario.pk1@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index e81c2493efdf..44ba900828f6 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -966,6 +966,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, .opts = opts, }; int ret = -1, err; + int not_annotated = list_empty(¬es->src->source); if (sym == NULL) return -1; @@ -973,13 +974,15 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, if (ms->map->dso->annotate_warned) return -1; - err = symbol__annotate2(ms, evsel, opts, &browser.arch); - if (err) { - char msg[BUFSIZ]; - ms->map->dso->annotate_warned = true; - symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); - ui__error("Couldn't annotate %s:\n%s", sym->name, msg); - goto out_free_offsets; + if (not_annotated) { + err = symbol__annotate2(ms, evsel, opts, &browser.arch); + if (err) { + char msg[BUFSIZ]; + ms->map->dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); + goto out_free_offsets; + } } ui_helpline__push("Press ESC to exit"); @@ -994,9 +997,11 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ret = annotate_browser__run(&browser, evsel, hbt); - annotated_source__purge(notes->src); + if(not_annotated) + annotated_source__purge(notes->src); out_free_offsets: - zfree(¬es->offsets); + if(not_annotated) + zfree(¬es->offsets); return ret; } From b6e43dddaea3dbfa93327f986beb3ec5e8157c4c Mon Sep 17 00:00:00 2001 From: Qinghua Jin Date: Mon, 10 Jan 2022 12:46:05 -0800 Subject: [PATCH 312/493] Input: ti_am335x_tsc - fix a typo in a comment change 'postion' to 'position' Signed-off-by: Qinghua Jin Link: https://lore.kernel.org/r/20220106084215.355295-1-qhjin.dev@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ti_am335x_tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index f4ef218bc1b8..f2fb6a9a1a57 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -318,7 +318,7 @@ static irqreturn_t titsc_irq(int irq, void *dev) /* * Calculate pressure using formula * Resistance(touch) = x plate resistance * - * x postion/4096 * ((z2 / z1) - 1) + * x position/4096 * ((z2 / z1) - 1) */ z = z1 - z2; z *= x; From 7f7b4236f2040d19df1ddaf30047128b41e78de7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 17 Dec 2021 15:13:48 +0100 Subject: [PATCH 313/493] x86/PCI: Ignore E820 reservations for bridge windows on newer systems Some BIOS-es contain a bug where they add addresses which map to system RAM in the PCI host bridge window returned by the ACPI _CRS method, see commit 4dc2287c1805 ("x86: avoid E820 regions when allocating address space"). To work around this bug Linux excludes E820 reserved addresses when allocating addresses from the PCI host bridge window since 2010. Recently (2019) some systems have shown-up with E820 reservations which cover the entire _CRS returned PCI bridge memory window, causing all attempts to assign memory to PCI BARs which have not been setup by the BIOS to fail. For example here are the relevant dmesg bits from a Lenovo IdeaPad 3 15IIL 81WE: [mem 0x000000004bc50000-0x00000000cfffffff] reserved pci_bus 0000:00: root bus resource [mem 0x65400000-0xbfffffff window] The ACPI specifications appear to allow this new behavior: The relationship between E820 and ACPI _CRS is not really very clear. ACPI v6.3, sec 15, table 15-374, says AddressRangeReserved means: This range of addresses is in use or reserved by the system and is not to be included in the allocatable memory pool of the operating system's memory manager. and it may be used when: The address range is in use by a memory-mapped system device. Furthermore, sec 15.2 says: Address ranges defined for baseboard memory-mapped I/O devices, such as APICs, are returned as reserved. A PCI host bridge qualifies as a baseboard memory-mapped I/O device, and its apertures are in use and certainly should not be included in the general allocatable pool, so the fact that some BIOS-es reports the PCI aperture as "reserved" in E820 doesn't seem like a BIOS bug. So it seems that the excluding of E820 reserved addresses is a mistake. Ideally Linux would fully stop excluding E820 reserved addresses, but then the old systems this was added for will regress. Instead keep the old behavior for old systems, while ignoring the E820 reservations for any systems from now on. Old systems are defined here as BIOS year < 2018, this was chosen to make sure that E820 reservations will not be used on the currently affected systems, while at the same time also taking into account that the systems for which the E820 checking was originally added may have received BIOS updates for quite a while (esp. CVE related ones), giving them a more recent BIOS year then 2010. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206459 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1868899 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1871793 BugLink: https://bugs.launchpad.net/bugs/1878279 BugLink: https://bugs.launchpad.net/bugs/1931715 BugLink: https://bugs.launchpad.net/bugs/1932069 BugLink: https://bugs.launchpad.net/bugs/1921649 Reviewed-by: Mika Westerberg Acked-by: Bjorn Helgaas Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/resource.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 9b9fb7882c20..9ae64f9af956 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -23,11 +24,31 @@ static void resource_clip(struct resource *res, resource_size_t start, res->start = end + 1; } +/* + * Some BIOS-es contain a bug where they add addresses which map to + * system RAM in the PCI host bridge window returned by the ACPI _CRS + * method, see commit 4dc2287c1805 ("x86: avoid E820 regions when + * allocating address space"). To avoid this Linux by default excludes + * E820 reservations when allocating addresses since 2010. + * In 2019 some systems have shown-up with E820 reservations which cover + * the entire _CRS returned PCI host bridge window, causing all attempts + * to assign memory to PCI BARs to fail if Linux uses E820 reservations. + * + * Ideally Linux would fully stop using E820 reservations, but then + * the old systems this was added for will regress. + * Instead keep the old behavior for old systems, while ignoring the + * E820 reservations for any systems from now on. + */ static void remove_e820_regions(struct resource *avail) { - int i; + int i, year = dmi_get_bios_year(); struct e820_entry *entry; + if (year >= 2018) + return; + + pr_info_once("PCI: Removing E820 reservations from host bridge windows\n"); + for (i = 0; i < e820_table->nr_entries; i++) { entry = &e820_table->entries[i]; From 2cea3ec5b0099d0e9dd6752aa86e08bce38d6b32 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 7 Jan 2022 11:35:16 +0800 Subject: [PATCH 314/493] ACPI: APD: Check for NULL pointer after calling devm_ioremap() Because devres_alloc() may fail, devm_ioremap() may return NULL. Then, 'clk_data->base' will be assigned to clkdev->data->base in platform_device_register_data(). The PTR_ERR_OR_ZERO() check on clk_data does not cover 'base', so it is better to add an explicit check against NULL after updating it. Fixes: 3f4ba94e3615 ("ACPI: APD: Add AMD misc clock handler support") Signed-off-by: Jiasheng Jiang [ rjw: Changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_apd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index 6e02448d15d9..9db6409ecb47 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -95,6 +95,8 @@ static int fch_misc_setup(struct apd_private_data *pdata) resource_size(rentry->res)); break; } + if (!clk_data->base) + return -ENOMEM; acpi_dev_free_resource_list(&resource_list); From ee3fe99ff0a27108ac38d9766ac0e92f5ec35692 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Wed, 5 Jan 2022 11:47:14 -0600 Subject: [PATCH 315/493] ACPI: SPCR: check if table->serial_port.access_width is too wide If table->serial_port.access_width is more than 29, it causes undefined behavior when ACPI_ACCESS_BIT_WIDTH shifts it to (1 << ((size) + 2)): [ 0.000000] UBSAN: Undefined behaviour in drivers/acpi/spcr.c:114:11 [ 0.000000] shift exponent 102 is too large for 32-bit type 'int' Use the new ACPI_ACCESS_ defines to test that serial_port.access_width is less than 30 and set it to 6 if it is not. Signed-off-by: Mark Langsdorf Signed-off-by: Rafael J. Wysocki --- drivers/acpi/spcr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c index 25c2d0be953e..d589543875b8 100644 --- a/drivers/acpi/spcr.c +++ b/drivers/acpi/spcr.c @@ -107,8 +107,13 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console) pr_info("SPCR table version %d\n", table->header.revision); if (table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { - switch (ACPI_ACCESS_BIT_WIDTH(( - table->serial_port.access_width))) { + u32 bit_width = table->serial_port.access_width; + + if (bit_width > ACPI_ACCESS_BIT_MAX) { + pr_err("Unacceptable wide SPCR Access Width. Defaulting to byte size\n"); + bit_width = ACPI_ACCESS_BIT_DEFAULT; + } + switch (ACPI_ACCESS_BIT_WIDTH((bit_width))) { default: pr_err("Unexpected SPCR Access Width. Defaulting to byte size\n"); fallthrough; From 0d5924ec4b89613910366c890305e46821a31f01 Mon Sep 17 00:00:00 2001 From: Sanjay R Mehta Date: Fri, 17 Dec 2021 03:56:20 -0600 Subject: [PATCH 316/493] ntb_hw_amd: Add NTB PCI ID for new gen CPU Add NTB support for new generation of processor Signed-off-by: Sanjay R Mehta Signed-off-by: Jon Mason --- drivers/ntb/hw/amd/ntb_hw_amd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index 87847c380051..04550b1f984c 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -1321,6 +1321,8 @@ static const struct ntb_dev_data dev_data[] = { static const struct pci_device_id amd_ntb_pci_tbl[] = { { PCI_VDEVICE(AMD, 0x145b), (kernel_ulong_t)&dev_data[0] }, { PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] }, + { PCI_VDEVICE(AMD, 0x14c0), (kernel_ulong_t)&dev_data[1] }, + { PCI_VDEVICE(AMD, 0x14c3), (kernel_ulong_t)&dev_data[1] }, { PCI_VDEVICE(HYGON, 0x145b), (kernel_ulong_t)&dev_data[0] }, { 0, } }; From e70dc094265c0418bbd895d9657611ac8d509a1c Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 22 Dec 2021 09:55:13 +0800 Subject: [PATCH 317/493] NTB/msi: Fix ntbm_msi_request_threaded_irq() kernel-doc comment Add the description of @msi_desc and change the @devname to @name in ntbm_msi_request_threaded_irq() kernel-doc comment to remove some warnings found by running scripts/kernel-doc, which is caused by using 'make W=1'. drivers/ntb/msi.c:285: warning: Function parameter or member 'name' not described in 'ntbm_msi_request_threaded_irq' drivers/ntb/msi.c:285: warning: Function parameter or member 'msi_desc' not described in 'ntbm_msi_request_threaded_irq' drivers/ntb/msi.c:285: warning: Excess function parameter 'devname' description in 'ntbm_msi_request_threaded_irq' Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Jon Mason --- drivers/ntb/msi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c index 3f05cfbc73af..60953cf54af6 100644 --- a/drivers/ntb/msi.c +++ b/drivers/ntb/msi.c @@ -260,8 +260,9 @@ static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry, * @handler: Function to be called when the IRQ occurs * @thread_fn: Function to be called in a threaded interrupt context. NULL * for clients which handle everything in @handler - * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @name: An ascii name for the claiming device, dev_name(dev) if NULL * @dev_id: A cookie passed back to the handler function + * @msi_desc: MSI descriptor data which triggers the interrupt * * This function assigns an interrupt handler to an unused * MSI interrupt and returns the descriptor used to trigger From 78c5335b1aa6a2ba60dbad7e5bf10b3ef517c18a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 22 Dec 2021 16:48:02 -0800 Subject: [PATCH 318/493] ntb_hw_switchtec: fix the spelling of "its" Use the possessive "its" instead of the contraction "it's" (it is) in user messages. Signed-off-by: Randy Dunlap Cc: Kurt Schwemmer Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 4c6eb61a6ac6..b2d956c2d610 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -297,7 +297,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, * (see CMA_CONFIG_ALIGNMENT) */ dev_err(&sndev->stdev->dev, - "ERROR: Memory window address is not aligned to it's size!\n"); + "ERROR: Memory window address is not aligned to its size!\n"); return -EINVAL; } From 32c3d375b0ed84b6acb51ae5ebef35ff0d649d85 Mon Sep 17 00:00:00 2001 From: Jeremy Pallotta Date: Thu, 23 Dec 2021 17:23:29 -0800 Subject: [PATCH 319/493] ntb_hw_switchtec: Fix pff ioread to read into mmio_part_cfg_all Array mmio_part_cfg_all holds the partition configuration of all partitions, with partition number as index. Fix this by reading into mmio_part_cfg_all for pff. Fixes: 0ee28f26f378 ("NTB: switchtec_ntb: Add link management") Signed-off-by: Jeremy Pallotta Signed-off-by: Kelvin Cao Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index b2d956c2d610..2f5b2b5e75e4 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -419,8 +419,8 @@ static void switchtec_ntb_part_link_speed(struct switchtec_ntb *sndev, enum ntb_width *width) { struct switchtec_dev *stdev = sndev->stdev; - - u32 pff = ioread32(&stdev->mmio_part_cfg[partition].vep_pff_inst_id); + u32 pff = + ioread32(&stdev->mmio_part_cfg_all[partition].vep_pff_inst_id); u32 linksta = ioread32(&stdev->mmio_pff_csr[pff].pci_cap_region[13]); if (speed) From 7ff351c86b6b258f387502ab2c9b9d04f82c1c3d Mon Sep 17 00:00:00 2001 From: Wesley Sheng Date: Thu, 23 Dec 2021 17:23:30 -0800 Subject: [PATCH 320/493] ntb_hw_switchtec: Fix bug with more than 32 partitions Switchtec could support as mush as 48 partitions, but ffs & fls are for 32 bit argument, in case of partition index larger than 31, the current code could not parse the peer partition index correctly. Change to the 64 bit version __ffs64 & fls64 accordingly to fix this bug. Fixes: 3df54c870f52 ("ntb_hw_switchtec: Allow using Switchtec NTB in multi-partition setups") Signed-off-by: Wesley Sheng Signed-off-by: Kelvin Cao Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 2f5b2b5e75e4..36efba21db4b 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -840,7 +840,6 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) u64 tpart_vec; int self; u64 part_map; - int bit; sndev->ntb.pdev = sndev->stdev->pdev; sndev->ntb.topo = NTB_TOPO_SWITCH; @@ -861,29 +860,28 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) part_map = ioread64(&sndev->mmio_ntb->ep_map); part_map &= ~(1 << sndev->self_partition); - if (!ffs(tpart_vec)) { + if (!tpart_vec) { if (sndev->stdev->partition_count != 2) { dev_err(&sndev->stdev->dev, "ntb target partition not defined\n"); return -ENODEV; } - bit = ffs(part_map); - if (!bit) { + if (!part_map) { dev_err(&sndev->stdev->dev, "peer partition is not NT partition\n"); return -ENODEV; } - sndev->peer_partition = bit - 1; + sndev->peer_partition = __ffs64(part_map); } else { - if (ffs(tpart_vec) != fls(tpart_vec)) { + if (__ffs64(tpart_vec) != (fls64(tpart_vec) - 1)) { dev_err(&sndev->stdev->dev, "ntb driver only supports 1 pair of 1-1 ntb mapping\n"); return -ENODEV; } - sndev->peer_partition = ffs(tpart_vec) - 1; + sndev->peer_partition = __ffs64(tpart_vec); if (!(part_map & (1ULL << sndev->peer_partition))) { dev_err(&sndev->stdev->dev, "ntb target partition is not NT partition\n"); From 857e239c3ef57e6ba0ff148de96e14249a4d669b Mon Sep 17 00:00:00 2001 From: Jeremy Pallotta Date: Thu, 23 Dec 2021 17:23:31 -0800 Subject: [PATCH 321/493] ntb_hw_switchtec: AND with the part_map for a valid tpart_vec Some firmware versions return 1 in the target partition vector for undefined partitions. AND with the part_map to give a valid tpart_vec. Signed-off-by: Jeremy Pallotta Signed-off-by: Kelvin Cao Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 36efba21db4b..8e9b97631884 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -858,6 +858,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) tpart_vec |= ioread32(&sndev->mmio_ntb->ntp_info[self].target_part_low); part_map = ioread64(&sndev->mmio_ntb->ep_map); + tpart_vec &= part_map; part_map &= ~(1 << sndev->self_partition); if (!tpart_vec) { From 2f58265e163df2dc3f49118bcb38771b66f7e979 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Thu, 23 Dec 2021 17:23:32 -0800 Subject: [PATCH 322/493] ntb_hw_switchtec: Update the way of getting VEP instance ID Gen4 firmware adds DMA VEP and NVMe VEP support in VEP (virtual EP) instance ID register in addtion to management EP. Update the way of getting management VEP instance ID. Signed-off-by: Kelvin Cao Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 8e9b97631884..ba6a9670c681 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -419,8 +419,10 @@ static void switchtec_ntb_part_link_speed(struct switchtec_ntb *sndev, enum ntb_width *width) { struct switchtec_dev *stdev = sndev->stdev; - u32 pff = - ioread32(&stdev->mmio_part_cfg_all[partition].vep_pff_inst_id); + struct part_cfg_regs __iomem *part_cfg = + &stdev->mmio_part_cfg_all[partition]; + + u32 pff = ioread32(&part_cfg->vep_pff_inst_id) & 0xFF; u32 linksta = ioread32(&stdev->mmio_pff_csr[pff].pci_cap_region[13]); if (speed) @@ -1089,7 +1091,7 @@ static int crosslink_enum_partition(struct switchtec_ntb *sndev, { struct part_cfg_regs __iomem *part_cfg = &sndev->stdev->mmio_part_cfg_all[sndev->peer_partition]; - u32 pff = ioread32(&part_cfg->vep_pff_inst_id); + u32 pff = ioread32(&part_cfg->vep_pff_inst_id) & 0xFF; struct pff_csr_regs __iomem *mmio_pff = &sndev->stdev->mmio_pff_csr[pff]; const u64 bar_space = 0x1000000000LL; From 1d3cfc2835c1754d19a743dc346a9e58cf0c07c0 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Thu, 23 Dec 2021 17:23:33 -0800 Subject: [PATCH 323/493] ntb_hw_switchtec: Remove code for disabling ID protection ID protection is a firmware setting for NT window access control. With it enabled, only the posted requests with requester IDs in the requester ID table will be allowed to access the NT windows. Otherwise all posted requests are allowed. Normally user will configure it statically via the Switchtec config file, and it will take effect when the firmware boots up. The driver can also toggle the ID protection setting dynamically, which will overwrite the static setting in the Switchtec config file as a side effect. Currently, the driver disables the ID protection. However, it's not necessary to disable the ID protection at the driver level as the driver has already configured the proper requester IDs in the requester ID table to allow the corresponding posted requests to hit the NT windows. Remove the code that disables the ID protection to make the static setting prevail. Note: ID protection is not applicable to non-posted requests. Signed-off-by: Kelvin Cao Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 3 --- include/linux/switchtec.h | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index ba6a9670c681..e79a355bafbf 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -967,9 +967,6 @@ static int config_req_id_table(struct switchtec_ntb *sndev, if (rc) return rc; - iowrite32(NTB_PART_CTRL_ID_PROT_DIS, - &mmio_ctrl->partition_ctrl); - for (i = 0; i < count; i++) { iowrite32(req_ids[i] << 16 | NTB_CTRL_REQ_ID_EN, &mmio_ctrl->req_id_table[i]); diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index be24056ac00f..48fabe36509e 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -337,8 +337,6 @@ enum { NTB_CTRL_REQ_ID_EN = 1 << 0, NTB_CTRL_LUT_EN = 1 << 0, - - NTB_PART_CTRL_ID_PROT_DIS = 1 << 0, }; struct ntb_ctrl_regs { From 8cd778650ae223cd306588042b55d0290ef81037 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Thu, 23 Dec 2021 17:23:34 -0800 Subject: [PATCH 324/493] ntb_hw_switchtec: Fix a minor issue in config_req_id_table() The req_id_table_size field is 16-bit wide, use ioread16() to read the value. Signed-off-by: Kelvin Cao Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index e79a355bafbf..88ae18b0efa8 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -955,7 +955,7 @@ static int config_req_id_table(struct switchtec_ntb *sndev, u32 error; u32 proxy_id; - if (ioread32(&mmio_ctrl->req_id_table_size) < count) { + if (ioread16(&mmio_ctrl->req_id_table_size) < count) { dev_err(&sndev->stdev->dev, "Not enough requester IDs available.\n"); return -EFAULT; From 0046686da0ef692a6381260c3aa44291187eafc9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Dec 2021 10:39:47 -0800 Subject: [PATCH 325/493] perf test: Enable system wide for metricgroups test Uncore events as group leaders fail in per-thread mode causing exit errors. Enable system-wide for metricgroup testing. This fixes the HPC metric group when tested on skylakex. Fixes: 4a87dea9e60fe100 ("perf test: Workload test of metric and metricgroups") Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211223183948.3423989-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat_all_metricgroups.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh index de24d374ce24..cb35e488809a 100755 --- a/tools/perf/tests/shell/stat_all_metricgroups.sh +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -6,7 +6,7 @@ set -e for m in $(perf list --raw-dump metricgroups); do echo "Testing $m" - perf stat -M "$m" true + perf stat -M "$m" -a true done exit 0 From b6c55b162bcee62c43c18e59f38a4590be543032 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jan 2022 17:50:22 +0100 Subject: [PATCH 326/493] ACPI: scan: Change acpi_scan_init() return value type to void The only caller of acpi_scan_init(), acpi_init(), doesn't check its return value, so turn it into a void function. This avoids complaints from the Smatch static checker that the function should return a negative error code when it fails, which is not really a problem in this particular case. No intentional functional impact. Link: https://lore.kernel.org/linux-acpi/20220106082317.GA9123@kili/ Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- drivers/acpi/internal.h | 2 +- drivers/acpi/scan.c | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 1db3a2f81763..457e11d851b8 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -14,7 +14,7 @@ int early_acpi_osi_init(void); int acpi_osi_init(void); acpi_status acpi_os_initialize1(void); -int acpi_scan_init(void); +void acpi_scan_init(void); #ifdef CONFIG_PCI void acpi_pci_root_init(void); void acpi_pci_link_init(void); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 1185ecea59d1..7e7417f4df22 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2542,9 +2542,8 @@ static void __init acpi_get_spcr_uart_addr(void) static bool acpi_scan_initialized; -int __init acpi_scan_init(void) +void __init acpi_scan_init(void) { - int result; acpi_status status; struct acpi_table_stao *stao_ptr; @@ -2594,8 +2593,7 @@ int __init acpi_scan_init(void) /* * Enumerate devices in the ACPI namespace. */ - result = acpi_bus_scan(ACPI_ROOT_OBJECT); - if (result) + if (acpi_bus_scan(ACPI_ROOT_OBJECT)) goto out; acpi_root = acpi_fetch_acpi_dev(ACPI_ROOT_OBJECT); @@ -2604,8 +2602,7 @@ int __init acpi_scan_init(void) /* Fixed feature devices do not exist on HW-reduced platform */ if (!acpi_gbl_reduced_hardware) { - result = acpi_bus_scan_fixed(); - if (result) { + if (acpi_bus_scan_fixed()) { acpi_detach_data(acpi_root->handle, acpi_scan_drop_device); acpi_device_del(acpi_root); @@ -2620,7 +2617,6 @@ int __init acpi_scan_init(void) out: mutex_unlock(&acpi_scan_lock); - return result; } static struct acpi_probe_entry *ape; From 681e7187aef46f8d4e0fd0ddd2f888e3e5533cb2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jan 2022 17:52:00 +0100 Subject: [PATCH 327/493] ACPI: scan: Simplify initialization of power and sleep buttons It should be perfectly fine to use ACPI if the "fixed" power or sleep buttons cannot be initialized. Moreover, running acpi_bus_scan() successfully on ACPI_ROOT_OBJECT generally causes many devices to be enumerated and probed, possibly including the entire PCI bus, so unregistering acpi_root if the registration of the "fixed" buttons fails is rather unhelpful. For this reason, do not fail acpi_scan_init() when acpi_bus_scan_fixed() fails and turn the latter into a void function. While at it, drop the outdated and misleading comment from acpi_bus_scan_fixed(). Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- drivers/acpi/scan.c | 58 ++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 7e7417f4df22..81ee565f47cd 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2486,42 +2486,33 @@ int acpi_bus_register_early_device(int type) } EXPORT_SYMBOL_GPL(acpi_bus_register_early_device); -static int acpi_bus_scan_fixed(void) +static void acpi_bus_scan_fixed(void) { - int result = 0; - - /* - * Enumerate all fixed-feature devices. - */ if (!(acpi_gbl_FADT.flags & ACPI_FADT_POWER_BUTTON)) { - struct acpi_device *device = NULL; + struct acpi_device *adev = NULL; - result = acpi_add_single_object(&device, NULL, - ACPI_BUS_TYPE_POWER_BUTTON, false); - if (result) - return result; - - device->flags.match_driver = true; - result = device_attach(&device->dev); - if (result < 0) - return result; - - device_init_wakeup(&device->dev, true); + acpi_add_single_object(&adev, NULL, ACPI_BUS_TYPE_POWER_BUTTON, + false); + if (adev) { + adev->flags.match_driver = true; + if (device_attach(&adev->dev) >= 0) + device_init_wakeup(&adev->dev, true); + else + dev_dbg(&adev->dev, "No driver\n"); + } } if (!(acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON)) { - struct acpi_device *device = NULL; + struct acpi_device *adev = NULL; - result = acpi_add_single_object(&device, NULL, - ACPI_BUS_TYPE_SLEEP_BUTTON, false); - if (result) - return result; - - device->flags.match_driver = true; - result = device_attach(&device->dev); + acpi_add_single_object(&adev, NULL, ACPI_BUS_TYPE_SLEEP_BUTTON, + false); + if (adev) { + adev->flags.match_driver = true; + if (device_attach(&adev->dev) < 0) + dev_dbg(&adev->dev, "No driver\n"); + } } - - return result < 0 ? result : 0; } static void __init acpi_get_spcr_uart_addr(void) @@ -2601,15 +2592,8 @@ void __init acpi_scan_init(void) goto out; /* Fixed feature devices do not exist on HW-reduced platform */ - if (!acpi_gbl_reduced_hardware) { - if (acpi_bus_scan_fixed()) { - acpi_detach_data(acpi_root->handle, - acpi_scan_drop_device); - acpi_device_del(acpi_root); - acpi_bus_put_acpi_device(acpi_root); - goto out; - } - } + if (!acpi_gbl_reduced_hardware) + acpi_bus_scan_fixed(); acpi_turn_off_unused_power_resources(); From c96f195deeefecd8ef95c5aa508671dd246bf119 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jan 2022 17:53:29 +0100 Subject: [PATCH 328/493] ACPI: scan: Rename label in acpi_scan_init() Rename the "out" label in acpi_scan_init() to "unlock", which is a better match for its purpose, and fix up its alignment. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- drivers/acpi/scan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 81ee565f47cd..75fab11f9188 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2585,11 +2585,11 @@ void __init acpi_scan_init(void) * Enumerate devices in the ACPI namespace. */ if (acpi_bus_scan(ACPI_ROOT_OBJECT)) - goto out; + goto unlock; acpi_root = acpi_fetch_acpi_dev(ACPI_ROOT_OBJECT); if (!acpi_root) - goto out; + goto unlock; /* Fixed feature devices do not exist on HW-reduced platform */ if (!acpi_gbl_reduced_hardware) @@ -2599,7 +2599,7 @@ void __init acpi_scan_init(void) acpi_scan_initialized = true; - out: +unlock: mutex_unlock(&acpi_scan_lock); } From 62942e9fda9fd1def10ffcbd5e1c025b3c9eec17 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 12 Jan 2022 10:50:57 +0200 Subject: [PATCH 329/493] perf script: Fix hex dump character output Using grep -C with perf script -D can give erroneous results as grep loses lines due to non-printable characters, for example, below the 0020, 0060 and 0070 lines are missing: $ perf script -D | grep -C10 AUX | head . 0010: 08 00 00 00 00 00 00 00 1f 00 00 00 00 00 00 00 ................ . 0030: 01 00 00 00 00 00 00 00 00 04 00 00 00 00 00 00 ................ . 0040: 00 08 00 00 00 00 00 00 02 00 00 00 00 00 00 00 ................ . 0050: 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................ . 0080: 02 00 00 00 00 00 00 00 1b 00 00 00 00 00 00 00 ................ . 0090: 00 00 00 00 00 00 00 00 ........ 0 0 0x450 [0x98]: PERF_RECORD_AUXTRACE_INFO type: 1 PMU Type 8 Time Shift 31 perf's isprint() is a custom implementation from the kernel, but the kernel's _ctype appears to include characters from Latin-1 Supplement which is not compatible with, for example, UTF-8. Fix by checking also isascii(). After: $ tools/perf/perf script -D | grep -C10 AUX | head . 0010: 08 00 00 00 00 00 00 00 1f 00 00 00 00 00 00 00 ................ . 0020: 03 84 32 2f 00 00 00 00 63 7c 4f d2 fa ff ff ff ..2/....c|O..... . 0030: 01 00 00 00 00 00 00 00 00 04 00 00 00 00 00 00 ................ . 0040: 00 08 00 00 00 00 00 00 02 00 00 00 00 00 00 00 ................ . 0050: 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................ . 0060: 00 02 00 00 00 00 00 00 00 c0 03 00 00 00 00 00 ................ . 0070: e2 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 ................ . 0080: 02 00 00 00 00 00 00 00 1b 00 00 00 00 00 00 00 ................ . 0090: 00 00 00 00 00 00 00 00 ........ Fixes: 3052ba56bcb58904 ("tools perf: Move from sane_ctype.h obtained from git to the Linux's original") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20220112085057.277205-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2c06abf6dcd2..65e6c22f38e4 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -179,7 +179,7 @@ static int trace_event_printer(enum binary_printer_ops op, break; case BINARY_PRINT_CHAR_DATA: printed += color_fprintf(fp, color, "%c", - isprint(ch) ? ch : '.'); + isprint(ch) && isascii(ch) ? ch : '.'); break; case BINARY_PRINT_CHAR_PAD: printed += color_fprintf(fp, color, " "); From 8de78328f041f10a2b546fdb3791a87ba6b742e6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2022 14:19:21 -0300 Subject: [PATCH 330/493] Revert "perf powerpc: Add encodings to represent data based on newer composite PERF_MEM_LVLNUM* fields" This was in a patchkit mixing up kernel with tools/ parts and I mistakenly got it merged in the perf tools tree, revert it, it'll go via the PowerPC kernel tree. This reverts commit 0ebce3d65f1f53c936fdd51e975bd876ba7ed64f. Cc: kajoljain Cc: Michael Ellerman Cc: Stephen Rothwell Link: http://lore.kernel.org/lkml/20220112171659.531d22ce@canb.auug.org.au Signed-off-by: Arnaldo Carvalho de Melo --- arch/powerpc/perf/isa207-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4037ea652522..0c8b1a5cfe5c 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -220,13 +220,13 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) /* Nothing to do */ break; case 1: - ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT); + ret = PH(LVL, L1); break; case 2: - ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); + ret = PH(LVL, L2); break; case 3: - ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); + ret = PH(LVL, L3); break; case 4: if (cpu_has_feature(CPU_FTR_ARCH_31)) { From b4bb6f05e4b25e66825956006c3d5cbe5b73eaec Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2022 14:21:45 -0300 Subject: [PATCH 331/493] Revert "perf powerpc: Add data source encodings for power10 platform" This was in a patchkit mixing up kernel with tools/ parts and I mistakenly got it merged in the perf tools tree, revert it, it'll go via the PowerPC kernel tree. This reverts commit af2b24f228a0373ac65eb7a502e0bc31e2c0269d. Cc: kajoljain Cc: Michael Ellerman Cc: Stephen Rothwell Link: http://lore.kernel.org/lkml/20220112171659.531d22ce@canb.auug.org.au Signed-off-by: Arnaldo Carvalho de Melo --- arch/powerpc/perf/isa207-common.c | 54 +++++++------------------------ 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 0c8b1a5cfe5c..7ea873ab2e6f 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -229,28 +229,13 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) ret = PH(LVL, L3); break; case 4: - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - ret = P(SNOOP, HIT); - - if (sub_idx == 1) - ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); - else if (sub_idx == 2 || sub_idx == 3) - ret |= P(LVL, HIT) | LEVEL(PMEM); - else if (sub_idx == 4) - ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); - else if (sub_idx == 5 || sub_idx == 7) - ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; - else if (sub_idx == 6) - ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); - } else { - if (sub_idx <= 1) - ret = PH(LVL, LOC_RAM); - else if (sub_idx > 1 && sub_idx <= 2) - ret = PH(LVL, REM_RAM1); - else - ret = PH(LVL, REM_RAM2); - ret |= P(SNOOP, HIT); - } + if (sub_idx <= 1) + ret = PH(LVL, LOC_RAM); + else if (sub_idx > 1 && sub_idx <= 2) + ret = PH(LVL, REM_RAM1); + else + ret = PH(LVL, REM_RAM2); + ret |= P(SNOOP, HIT); break; case 5: if (cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -276,26 +261,11 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) } break; case 6: - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - if (sub_idx == 0) - ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | - P(SNOOP, HIT) | P(HOPS, 2); - else if (sub_idx == 1) - ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | - P(SNOOP, HITM) | P(HOPS, 2); - else if (sub_idx == 2) - ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | - P(SNOOP, HIT) | P(HOPS, 3); - else if (sub_idx == 3) - ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | - P(SNOOP, HITM) | P(HOPS, 3); - } else { - ret = PH(LVL, REM_CCE2); - if (sub_idx == 0 || sub_idx == 2) - ret |= P(SNOOP, HIT); - else if (sub_idx == 1 || sub_idx == 3) - ret |= P(SNOOP, HITM); - } + ret = PH(LVL, REM_CCE2); + if ((sub_idx == 0) || (sub_idx == 2)) + ret |= P(SNOOP, HIT); + else if ((sub_idx == 1) || (sub_idx == 3)) + ret |= P(SNOOP, HITM); break; case 7: ret = PM(LVL, L1); From dcffc5ebb80dd5887b91091b8ecd082c9ed75361 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Dec 2021 10:39:48 -0800 Subject: [PATCH 332/493] perf evsel: Improve error message for uncore events When a group has multiple events and the leader fails it can yield errors like: $ perf stat -e '{uncore_imc/cas_count_read/},instructions' /bin/true Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (uncore_imc/cas_count_read/). /bin/dmesg | grep -i perf may provide additional information. However, when not the group leader is given: $ perf stat -e '{instructions,uncore_imc/cas_count_read/}' /bin/true ... 1,619,057 instructions MiB uncore_imc/cas_count_read/ This is necessary because get_group_fd will fail if the leader fails and is the direct result of the check on line 750 of builtin-stat.c in stat_handle_error that returns COUNTER_SKIP for the latter case. This patch improves the error message to: $ perf stat -e '{uncore_imc/cas_count_read/},instructions' /bin/true Error: Invalid event (uncore_imc/cas_count_read/) in per-thread mode, enable system wide with '-a'. v2. Changed the test to use !target__has_cpu as suggested by Namhyung Kim. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211223183948.3423989-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 656c30b988ce..a0acf53a2510 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2931,6 +2931,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "wrong clockid (%d).", clockid); if (perf_missing_features.aux_output) return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel."); + if (!target__has_cpu(target)) + return scnprintf(msg, size, + "Invalid event (%s) in per-thread mode, enable system wide with '-a'.", + evsel__name(evsel)); break; case ENODATA: return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. " From 818ab78c03aad94fabc18d386e9c73b539a1f447 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:04 -0800 Subject: [PATCH 333/493] libperf: Add comments to 'struct perf_cpu_map' A particular observed problem is confusing the index with the CPU value, documentation should hopefully reduce this type of problem. Reviewed-by: James Clark Reviewed-by: John Garry Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/internal/cpumap.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 840d4032587b..4054169c12c5 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -4,9 +4,18 @@ #include +/** + * A sized, reference counted, sorted array of integers representing CPU + * numbers. This is commonly used to capture which CPUs a PMU is associated + * with. The indices into the cpumap are frequently used as they avoid having + * gaps if CPU numbers were used. For events associated with a pid, rather than + * a CPU, a single dummy map with an entry of -1 is used. + */ struct perf_cpu_map { refcount_t refcnt; + /** Length of the map array. */ int nr; + /** The CPU values. */ int map[]; }; From ca2c9b76bc3c75ac116ef199b75e7ca4e27e7acb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:05 -0800 Subject: [PATCH 334/493] perf stat: Add aggr creators that are passed a cpu The cpu_map and index can get confused. Add variants of the cpu_map__get routines that are passed a cpu. Make the existing cpu_map__get routines use the new functions with a view to remove them when no longer used. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 81 +++++++++++++++++++++++----------------- tools/perf/util/cpumap.h | 6 ++- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 87d3eca9b872..49fba2c53822 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -128,21 +128,23 @@ int cpu_map__get_socket_id(int cpu) return ret ?: value; } -struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, - void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data __maybe_unused) { - int cpu; struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - if (idx > map->nr) - return id; - - cpu = map->map[idx]; - id.socket = cpu_map__get_socket_id(cpu); return id; } +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, + void *data) +{ + if (idx < 0 || idx > map->nr) + return cpu_map__empty_aggr_cpu_id(); + + return cpu_map__get_socket_aggr_by_cpu(map->map[idx], data); +} + static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) { struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; @@ -200,15 +202,10 @@ int cpu_map__get_die_id(int cpu) return ret ?: value; } -struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data) { - int cpu, die; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - - if (idx > map->nr) - return id; - - cpu = map->map[idx]; + struct aggr_cpu_id id; + int die; die = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ @@ -220,7 +217,7 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat * with the socket ID and then add die to * make a unique ID. */ - id = cpu_map__get_socket(map, idx, data); + id = cpu_map__get_socket_aggr_by_cpu(cpu, data); if (cpu_map__aggr_cpu_id_is_empty(id)) return id; @@ -228,6 +225,15 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat return id; } +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, + void *data) +{ + if (idx < 0 || idx > map->nr) + return cpu_map__empty_aggr_cpu_id(); + + return cpu_map__get_die_aggr_by_cpu(map->map[idx], data); +} + int cpu_map__get_core_id(int cpu) { int value, ret = cpu__get_topology_int(cpu, "core_id", &value); @@ -239,20 +245,13 @@ int cpu_map__get_node_id(int cpu) return cpu__get_node(cpu); } -struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data) { - int cpu; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - - if (idx > map->nr) - return id; - - cpu = map->map[idx]; - - cpu = cpu_map__get_core_id(cpu); + struct aggr_cpu_id id; + int core = cpu_map__get_core_id(cpu); /* cpu_map__get_die returns a struct with socket and die set*/ - id = cpu_map__get_die(map, idx, data); + id = cpu_map__get_die_aggr_by_cpu(cpu, data); if (cpu_map__aggr_cpu_id_is_empty(id)) return id; @@ -260,21 +259,35 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da * core_id is relative to socket and die, we need a global id. * So we combine the result from cpu_map__get_die with the core id */ - id.core = cpu; + id.core = core; return id; + } -struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) +{ + if (idx < 0 || idx > map->nr) + return cpu_map__empty_aggr_cpu_id(); + + return cpu_map__get_core_aggr_by_cpu(map->map[idx], data); +} + +struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data __maybe_unused) { struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - if (idx < 0 || idx >= map->nr) - return id; - - id.node = cpu_map__get_node_id(map->map[idx]); + id.node = cpu_map__get_node_id(cpu); return id; } +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data) +{ + if (idx < 0 || idx >= map->nr) + return cpu_map__empty_aggr_cpu_id(); + + return cpu_map__get_node_aggr_by_cpu(map->map[idx], data); +} + int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index a27eeaf086e8..c62d67704425 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -31,13 +31,17 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); +struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_die_id(int cpu); +struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_core_id(int cpu); +struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_node_id(int cpu); -struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data); +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); From 01843ca0197783d0951a1948ebeaaed9a47ce55d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:06 -0800 Subject: [PATCH 335/493] perf stat: Correct aggregation CPU map Switch the perf_cpu_map in aggr_update_shadow from the evlist to the counter's cpu map, so the index is appropriate. This addresses a problem where uncore counts, with a cpumap like: $ cat /sys/devices/uncore_imc_0/cpumask 0,18 Don't aggregate counts in CPUs based on the index of those values in the cpumap (0 and 1) but on the actual CPU (0 and 18). Thereby correcting metric calculations in per-socket mode for counters without a full cpumask. On a SkylakeX with a tweaked DRAM_BW_Use metric, to remove unnecessary scaling, this gives: Before: $ /perf stat --per-socket -M DRAM_BW_Use -I 1000 1.001102293 S0 1 27.01 MiB uncore_imc/cas_count_write/ # 103.00 DRAM_BW_Use 1.001102293 S0 1 30.22 MiB uncore_imc/cas_count_read/ 1.001102293 S0 1 1,001,102,293 ns duration_time 1.001102293 S1 1 20.10 MiB uncore_imc/cas_count_write/ # 0.00 DRAM_BW_Use 1.001102293 S1 1 32.74 MiB uncore_imc/cas_count_read/ 1.001102293 S1 0 ns duration_time 2.003517973 S0 1 83.04 MiB uncore_imc/cas_count_write/ # 920.00 DRAM_BW_Use 2.003517973 S0 1 145.95 MiB uncore_imc/cas_count_read/ 2.003517973 S0 1 1,002,415,680 ns duration_time 2.003517973 S1 1 302.45 MiB uncore_imc/cas_count_write/ # 0.00 DRAM_BW_Use 2.003517973 S1 1 290.99 MiB uncore_imc/cas_count_read/ 2.003517973 S1 0 ns duration_time After: $ perf stat --per-socket -M DRAM_BW_Use -I 1000 1.001080840 S0 1 24.96 MiB uncore_imc/cas_count_write/ # 54.00 DRAM_BW_Use 1.001080840 S0 1 33.64 MiB uncore_imc/cas_count_read/ 1.001080840 S0 1 1,001,080,840 ns duration_time 1.001080840 S1 1 42.43 MiB uncore_imc/cas_count_write/ # 84.00 DRAM_BW_Use 1.001080840 S1 1 47.05 MiB uncore_imc/cas_count_read/ 1.001080840 S1 0 ns duration_time Signed-off-by: Ian Rogers Tested-by: John Garry Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 588601000f3f..b0fa81ffce61 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -526,7 +526,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { - s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); + s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); if (!cpu_map__compare_aggr_cpu_id(s2, id)) continue; val += perf_counts(counter->counts, cpu, 0)->val; From a023283fadef8a3f6916ba2b0c37955d76ffaf4d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:07 -0800 Subject: [PATCH 336/493] perf stat: Switch aggregation to use for_each loop Tidy up the use of cpu and index to hopefully make the code less error prone. Avoid unused warnings with (void) which will be removed in a later patch. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 48 +++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index b0fa81ffce61..efab39a759ff 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -330,8 +330,8 @@ static void print_metric_header(struct perf_stat_config *config, static int first_shadow_cpu(struct perf_stat_config *config, struct evsel *evsel, struct aggr_cpu_id id) { - struct evlist *evlist = evsel->evlist; - int i; + struct perf_cpu_map *cpus; + int cpu, idx; if (config->aggr_mode == AGGR_NONE) return id.core; @@ -339,14 +339,11 @@ static int first_shadow_cpu(struct perf_stat_config *config, if (!config->aggr_get_id) return 0; - for (i = 0; i < evsel__nr_cpus(evsel); i++) { - int cpu2 = evsel__cpus(evsel)->map[i]; - - if (cpu_map__compare_aggr_cpu_id( - config->aggr_get_id(config, evlist->core.cpus, cpu2), - id)) { - return cpu2; - } + cpus = evsel__cpus(evsel); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + if (cpu_map__compare_aggr_cpu_id(config->aggr_get_id(config, cpus, idx), + id)) + return cpu; } return 0; } @@ -516,20 +513,23 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int static void aggr_update_shadow(struct perf_stat_config *config, struct evlist *evlist) { - int cpu, s; + int cpu, idx, s; struct aggr_cpu_id s2, id; u64 val; struct evsel *counter; + struct perf_cpu_map *cpus; for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { + cpus = evsel__cpus(counter); val = 0; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { - s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + (void)cpu; + s2 = config->aggr_get_id(config, cpus, idx); if (!cpu_map__compare_aggr_cpu_id(s2, id)) continue; - val += perf_counts(counter->counts, cpu, 0)->val; + val += perf_counts(counter->counts, idx, 0)->val; } perf_stat__update_shadow_stats(counter, val, first_shadow_cpu(config, counter, id), @@ -634,18 +634,21 @@ static void aggr_cb(struct perf_stat_config *config, struct evsel *counter, void *data, bool first) { struct aggr_data *ad = data; - int cpu; + int idx, cpu; + struct perf_cpu_map *cpus; struct aggr_cpu_id s2; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { + cpus = evsel__cpus(counter); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { struct perf_counts_values *counts; - s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); + (void)cpu; + s2 = config->aggr_get_id(config, cpus, idx); if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) continue; if (first) ad->nr++; - counts = perf_counts(counter->counts, cpu, 0); + counts = perf_counts(counter->counts, idx, 0); /* * When any result is bad, make them all to give * consistent output in interval mode. @@ -1208,10 +1211,13 @@ static void print_percore_thread(struct perf_stat_config *config, { int s; struct aggr_cpu_id s2, id; + struct perf_cpu_map *cpus; bool first = true; + int idx, cpu; - for (int i = 0; i < evsel__nr_cpus(counter); i++) { - s2 = config->aggr_get_id(config, evsel__cpus(counter), i); + cpus = evsel__cpus(counter); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + s2 = config->aggr_get_id(config, cpus, idx); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; if (cpu_map__compare_aggr_cpu_id(s2, id)) @@ -1220,7 +1226,7 @@ static void print_percore_thread(struct perf_stat_config *config, print_counter_aggrdata(config, counter, s, prefix, false, - &first, i); + &first, cpu); } } From 88031a0de7d68d132014154b9e5307428e8ed70d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:08 -0800 Subject: [PATCH 337/493] perf stat: Switch to cpu version of cpu_map__get() Avoid possible bugs where the wrong index is passed with the cpu_map. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 93 +++++++++++++++++++--------------- tools/perf/util/stat-display.c | 11 ++-- tools/perf/util/stat.h | 3 +- 3 files changed, 57 insertions(+), 50 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f6ca2b054c5b..9791ae9b1a53 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1323,69 +1323,63 @@ static struct option stat_options[] = { }; static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + int cpu) { - return cpu_map__get_socket(map, cpu, NULL); + return cpu_map__get_socket_aggr_by_cpu(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + int cpu) { - return cpu_map__get_die(map, cpu, NULL); + return cpu_map__get_die_aggr_by_cpu(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + int cpu) { - return cpu_map__get_core(map, cpu, NULL); + return cpu_map__get_core_aggr_by_cpu(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + int cpu) { - return cpu_map__get_node(map, cpu, NULL); + return cpu_map__get_node_aggr_by_cpu(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, - aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) + aggr_get_id_t get_id, int cpu) { - int cpu; struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - if (idx >= map->nr) - return id; - - cpu = map->map[idx]; - if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu])) - config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); + config->cpus_aggr_map->map[cpu] = get_id(config, cpu); id = config->cpus_aggr_map->map[cpu]; return id; } static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + int cpu) { - return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_socket, cpu); } static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + int cpu) { - return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_die, cpu); } static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + int cpu) { - return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_core, cpu); } static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + int cpu) { - return perf_stat__get_aggr(config, perf_stat__get_node, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_node, cpu); } static bool term_percore_set(void) @@ -1483,8 +1477,9 @@ static void perf_stat__exit_aggr_mode(void) stat_config.cpus_aggr_map = NULL; } -static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx) +static inline int perf_env__get_cpu(void *data, struct perf_cpu_map *map, int idx) { + struct perf_env *env = data; int cpu; if (idx > map->nr) @@ -1498,10 +1493,9 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *m return cpu; } -static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; - int cpu = perf_env__get_cpu(env, map, idx); struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (cpu != -1) @@ -1510,11 +1504,15 @@ static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx return id; } -static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) +{ + return perf_env__get_socket_aggr_by_cpu(perf_env__get_cpu(data, map, idx), data); +} + +static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - int cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { /* @@ -1529,11 +1527,15 @@ static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, v return id; } -static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) +{ + return perf_env__get_die_aggr_by_cpu(perf_env__get_cpu(data, map, idx), data); +} + +static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - int cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { /* @@ -1549,15 +1551,24 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, return id; } -static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) +{ + return perf_env__get_core_aggr_by_cpu(perf_env__get_cpu(data, map, idx), data); +} + +static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(int cpu, void *data) { - int cpu = perf_env__get_cpu(data, map, idx); struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); id.node = perf_env__numa_node(data, cpu); return id; } +static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) +{ + return perf_env__get_node_aggr_by_cpu(perf_env__get_cpu(data, map, idx), data); +} + static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { @@ -1583,26 +1594,26 @@ static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *c } static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + int cpu) { - return perf_env__get_socket(map, idx, &perf_stat.session->header.env); + return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + int cpu) { - return perf_env__get_die(map, idx, &perf_stat.session->header.env); + return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + int cpu) { - return perf_env__get_core(map, idx, &perf_stat.session->header.env); + return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + int cpu) { - return perf_env__get_node(map, idx, &perf_stat.session->header.env); + return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static int perf_stat_init_aggr_mode_file(struct perf_stat *st) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index efab39a759ff..6c40b91d5e32 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -341,8 +341,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, cpus = evsel__cpus(evsel); perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - if (cpu_map__compare_aggr_cpu_id(config->aggr_get_id(config, cpus, idx), - id)) + if (cpu_map__compare_aggr_cpu_id(config->aggr_get_id(config, cpu), id)) return cpu; } return 0; @@ -525,8 +524,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, cpus = evsel__cpus(counter); val = 0; perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - (void)cpu; - s2 = config->aggr_get_id(config, cpus, idx); + s2 = config->aggr_get_id(config, cpu); if (!cpu_map__compare_aggr_cpu_id(s2, id)) continue; val += perf_counts(counter->counts, idx, 0)->val; @@ -642,8 +640,7 @@ static void aggr_cb(struct perf_stat_config *config, perf_cpu_map__for_each_cpu(cpu, idx, cpus) { struct perf_counts_values *counts; - (void)cpu; - s2 = config->aggr_get_id(config, cpus, idx); + s2 = config->aggr_get_id(config, cpu); if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) continue; if (first) @@ -1217,7 +1214,7 @@ static void print_percore_thread(struct perf_stat_config *config, cpus = evsel__cpus(counter); perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpus, idx); + s2 = config->aggr_get_id(config, cpu); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; if (cpu_map__compare_aggr_cpu_id(s2, id)) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 32c8527de347..32cf24186229 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -108,8 +108,7 @@ struct runtime_stat { struct rblist value_list; }; -typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, - struct perf_cpu_map *m, int cpu); +typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, int cpu); struct perf_stat_config { enum aggr_mode aggr_mode; From eff54c24bb147afc0a1423b49bfa1b8eaa85a88f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:09 -0800 Subject: [PATCH 338/493] perf cpumap: Switch cpu_map__build_map() to cpu function Avoid error prone cpu_map + idx variant. Remove now unused functions. Committer notes: Remove by now unused perf_env__get_cpu(). Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 44 ++++----------------------------------- tools/perf/util/cpumap.c | 12 +++++------ tools/perf/util/cpumap.h | 2 +- 3 files changed, 11 insertions(+), 47 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9791ae9b1a53..40cb3518f27e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1477,22 +1477,6 @@ static void perf_stat__exit_aggr_mode(void) stat_config.cpus_aggr_map = NULL; } -static inline int perf_env__get_cpu(void *data, struct perf_cpu_map *map, int idx) -{ - struct perf_env *env = data; - int cpu; - - if (idx > map->nr) - return -1; - - cpu = map->map[idx]; - - if (cpu >= env->nr_cpus_avail) - return -1; - - return cpu; -} - static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; @@ -1504,11 +1488,6 @@ static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(int cpu, void *data) return id; } -static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) -{ - return perf_env__get_socket_aggr_by_cpu(perf_env__get_cpu(data, map, idx), data); -} - static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; @@ -1527,11 +1506,6 @@ static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(int cpu, void *data) return id; } -static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) -{ - return perf_env__get_die_aggr_by_cpu(perf_env__get_cpu(data, map, idx), data); -} - static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; @@ -1551,11 +1525,6 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(int cpu, void *data) return id; } -static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) -{ - return perf_env__get_core_aggr_by_cpu(perf_env__get_cpu(data, map, idx), data); -} - static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(int cpu, void *data) { struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); @@ -1564,33 +1533,28 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(int cpu, void *data) return id; } -static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) -{ - return perf_env__get_node_aggr_by_cpu(perf_env__get_cpu(data, map, idx), data); -} - static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { - return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); + return cpu_map__build_map(cpus, sockp, perf_env__get_socket_aggr_by_cpu, env); } static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) { - return cpu_map__build_map(cpus, diep, perf_env__get_die, env); + return cpu_map__build_map(cpus, diep, perf_env__get_die_aggr_by_cpu, env); } static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) { - return cpu_map__build_map(cpus, corep, perf_env__get_core, env); + return cpu_map__build_map(cpus, corep, perf_env__get_core_aggr_by_cpu, env); } static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep) { - return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); + return cpu_map__build_map(cpus, nodep, perf_env__get_node_aggr_by_cpu, env); } static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 49fba2c53822..feaf34b25efc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -163,7 +163,7 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, - struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), + struct aggr_cpu_id (*f)(int cpu, void *data), void *data) { int nr = cpus->nr; @@ -178,7 +178,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, c->nr = 0; for (cpu = 0; cpu < nr; cpu++) { - s1 = f(cpus, cpu, data); + s1 = f(cpu, data); for (s2 = 0; s2 < c->nr; s2++) { if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2])) break; @@ -290,22 +290,22 @@ struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *da int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { - return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); + return cpu_map__build_map(cpus, sockp, cpu_map__get_socket_aggr_by_cpu, NULL); } int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) { - return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); + return cpu_map__build_map(cpus, diep, cpu_map__get_die_aggr_by_cpu, NULL); } int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) { - return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); + return cpu_map__build_map(cpus, corep, cpu_map__get_core_aggr_by_cpu, NULL); } int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap) { - return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL); + return cpu_map__build_map(cpus, numap, cpu_map__get_node_aggr_by_cpu, NULL); } /* setup simple routines to easily access node numbers given a cpu number */ diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index c62d67704425..9648816c4255 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -63,7 +63,7 @@ int cpu__max_present_cpu(void); int cpu__get_node(int cpu); int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, - struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), + struct aggr_cpu_id (*f)(int cpu, void *data), void *data); int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); From 448a69d9f34d02920cffba741ca0a2e34a5bb316 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:10 -0800 Subject: [PATCH 339/493] perf cpumap: Remove map+index get_socket() Migrate final users to appropriate cpu variant. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 2 +- tools/perf/util/cpumap.c | 9 --------- tools/perf/util/cpumap.h | 1 - tools/perf/util/stat.c | 2 +- 4 files changed, 2 insertions(+), 12 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 869986139146..69a64074b897 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -150,7 +150,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that socket ID contains only socket for (i = 0; i < map->nr; i++) { - id = cpu_map__get_socket(map, i, NULL); + id = cpu_map__get_socket_aggr_by_cpu(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == id.socket); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index feaf34b25efc..342a5eaee9d3 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -136,15 +136,6 @@ struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data __maybe_u return id; } -struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, - void *data) -{ - if (idx < 0 || idx > map->nr) - return cpu_map__empty_aggr_cpu_id(); - - return cpu_map__get_socket_aggr_by_cpu(map->map[idx], data); -} - static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) { struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 9648816c4255..a53af24301d2 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -32,7 +32,6 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data); -struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_die_id(int cpu); struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 09ea334586f2..9eca1111fa52 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -328,7 +328,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL).socket; + s = cpu_map__get_socket_id(cpu); if (s < 0) return -1; From 1cdae3d6734779a637bc4e6ec24e7f615b4e71be Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:11 -0800 Subject: [PATCH 340/493] perf cpumap: Remove map+index get_die() Migrate final users to appropriate cpu variant. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 2 +- tools/perf/util/cpumap.c | 9 --------- tools/perf/util/cpumap.h | 1 - tools/perf/util/stat.c | 2 +- 4 files changed, 2 insertions(+), 12 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 69a64074b897..ce085b6f379b 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -136,7 +136,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that die ID contains socket and die for (i = 0; i < map->nr; i++) { - id = cpu_map__get_die(map, i, NULL); + id = cpu_map__get_die_aggr_by_cpu(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == id.socket); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 342a5eaee9d3..ff91c32da688 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -216,15 +216,6 @@ struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data) return id; } -struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, - void *data) -{ - if (idx < 0 || idx > map->nr) - return cpu_map__empty_aggr_cpu_id(); - - return cpu_map__get_die_aggr_by_cpu(map->map[idx], data); -} - int cpu_map__get_core_id(int cpu) { int value, ret = cpu__get_topology_int(cpu, "core_id", &value); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index a53af24301d2..365ed69699e1 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -34,7 +34,6 @@ int cpu_map__get_socket_id(int cpu); struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data); int cpu_map__get_die_id(int cpu); struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data); -struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_core_id(int cpu); struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 9eca1111fa52..5ed99bcfe91e 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -336,7 +336,7 @@ static int check_per_pkg(struct evsel *counter, * On multi-die system, die_id > 0. On no-die system, die_id = 0. * We use hashmap(socket, die) to check the used socket+die pair. */ - d = cpu_map__get_die(cpus, cpu, NULL).die; + d = cpu_map__get_die_id(cpu); if (d < 0) return -1; From 3f6233dc7798044637426ae1099d88aa375c467f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:12 -0800 Subject: [PATCH 341/493] perf cpumap: Remove map+index get_core() Migrate final users to appropriate cpu variant. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 2 +- tools/perf/util/cpumap.c | 8 -------- tools/perf/util/cpumap.h | 1 - 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ce085b6f379b..9a671670415a 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -121,7 +121,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that core ID contains socket, die and core for (i = 0; i < map->nr; i++) { - id = cpu_map__get_core(map, i, NULL); + id = cpu_map__get_core_aggr_by_cpu(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", session->header.env.cpu[map->map[i]].core_id == id.core); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index ff91c32da688..e8149bcf8bfa 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -246,14 +246,6 @@ struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data) } -struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) -{ - if (idx < 0 || idx > map->nr) - return cpu_map__empty_aggr_cpu_id(); - - return cpu_map__get_core_aggr_by_cpu(map->map[idx], data); -} - struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data __maybe_unused) { struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 365ed69699e1..7e1829468bd6 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -36,7 +36,6 @@ int cpu_map__get_die_id(int cpu); struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data); int cpu_map__get_core_id(int cpu); struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data); -struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_node_id(int cpu); struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); From 86d94048e234c94af88a528ab4d5ef16e8a89f8a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:13 -0800 Subject: [PATCH 342/493] perf cpumap: Remove map+index get_node() Migrate final users to appropriate cpu variant. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 2 +- tools/perf/util/cpumap.c | 8 -------- tools/perf/util/cpumap.h | 1 - 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 9a671670415a..5992b323c4f5 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -162,7 +162,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that node ID contains only node for (i = 0; i < map->nr; i++) { - id = cpu_map__get_node(map, i, NULL); + id = cpu_map__get_node_aggr_by_cpu(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index e8149bcf8bfa..f67b2e7aac13 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -254,14 +254,6 @@ struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data __maybe_unu return id; } -struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data) -{ - if (idx < 0 || idx >= map->nr) - return cpu_map__empty_aggr_cpu_id(); - - return cpu_map__get_node_aggr_by_cpu(map->map[idx], data); -} - int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { return cpu_map__build_map(cpus, sockp, cpu_map__get_socket_aggr_by_cpu, NULL); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 7e1829468bd6..f0121dd4fdcb 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -38,7 +38,6 @@ int cpu_map__get_core_id(int cpu); struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data); int cpu_map__get_node_id(int cpu); struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data); -struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); From 49679da388f4c45b0ca444dcf8bb5f59a02f8f4e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:14 -0800 Subject: [PATCH 343/493] perf cpumap: Add comments to aggr_cpu_id() This code is already tested in topology.c. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-12-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f0121dd4fdcb..edd93e1db36a 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -7,11 +7,20 @@ #include #include +/** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { + /** A value in the range 0 to number of threads. */ int thread; + /** The numa node X as read from /sys/devices/system/node/nodeX. */ int node; + /** + * The socket number as read from + * /sys/devices/system/cpu/cpuX/topology/physical_package_id. + */ int socket; + /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */ int die; + /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */ int core; }; From 63e0fa873d8820b996a01a83d832bf1b3969e9b6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:15 -0800 Subject: [PATCH 344/493] perf cpumap: Remove unused cpu_map__socket() Unused function so remove. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index edd93e1db36a..22e53fd54657 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -53,13 +53,6 @@ int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **cor int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ -static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) -{ - if (!sock || s > sock->nr || s < 0) - return 0; - return sock->map[s]; -} - int cpu__setup_cpunode_map(void); int cpu__max_node(void); From 3ac23d199c2bc3bc2a2b31c803e7c5d841959670 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:16 -0800 Subject: [PATCH 345/493] perf cpumap: Simplify equal function name Rename cpu_map__compare_aggr_cpu_id() to aggr_cpu_id__equal(), the cpu_map part of the name is misleading. Equal better describes the function than compare. Switch to const pointer rather than value as struct given the number of variables in aggr_cpu_id(). Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 14 +++++++------- tools/perf/util/cpumap.h | 2 +- tools/perf/util/stat-display.c | 18 ++++++++++-------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index f67b2e7aac13..8fa00a6221c8 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -171,7 +171,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpu, data); for (s2 = 0; s2 < c->nr; s2++) { - if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2])) + if (aggr_cpu_id__equal(&s1, &c->map[s2])) break; } if (s2 == c->nr) { @@ -593,13 +593,13 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ return online; } -bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) +bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) { - return a.thread == b.thread && - a.node == b.node && - a.socket == b.socket && - a.die == b.die && - a.core == b.core; + return a->thread == b->thread && + a->node == b->node && + a->socket == b->socket && + a->die == b->die && + a->core == b->core; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 22e53fd54657..652b76c69376 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -67,7 +67,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); -bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b); +bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a); struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 6c40b91d5e32..0241436bb1fb 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -328,20 +328,22 @@ static void print_metric_header(struct perf_stat_config *config, } static int first_shadow_cpu(struct perf_stat_config *config, - struct evsel *evsel, struct aggr_cpu_id id) + struct evsel *evsel, const struct aggr_cpu_id *id) { struct perf_cpu_map *cpus; int cpu, idx; if (config->aggr_mode == AGGR_NONE) - return id.core; + return id->core; if (!config->aggr_get_id) return 0; cpus = evsel__cpus(evsel); perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - if (cpu_map__compare_aggr_cpu_id(config->aggr_get_id(config, cpu), id)) + struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); + + if (aggr_cpu_id__equal(&cpu_id, id)) return cpu; } return 0; @@ -501,7 +503,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_cpu(config, counter, id), + first_shadow_cpu(config, counter, &id), &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only) { print_noise(config, counter, noise); @@ -525,12 +527,12 @@ static void aggr_update_shadow(struct perf_stat_config *config, val = 0; perf_cpu_map__for_each_cpu(cpu, idx, cpus) { s2 = config->aggr_get_id(config, cpu); - if (!cpu_map__compare_aggr_cpu_id(s2, id)) + if (!aggr_cpu_id__equal(&s2, &id)) continue; val += perf_counts(counter->counts, idx, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(config, counter, id), + first_shadow_cpu(config, counter, &id), &rt_stat); } } @@ -641,7 +643,7 @@ static void aggr_cb(struct perf_stat_config *config, struct perf_counts_values *counts; s2 = config->aggr_get_id(config, cpu); - if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) + if (!aggr_cpu_id__equal(&s2, &ad->id)) continue; if (first) ad->nr++; @@ -1217,7 +1219,7 @@ static void print_percore_thread(struct perf_stat_config *config, s2 = config->aggr_get_id(config, cpu); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; - if (cpu_map__compare_aggr_cpu_id(s2, id)) + if (aggr_cpu_id__equal(&s2, &id)) break; } From 51b826fadf4fc42c8614b752b6cb0cb516589ade Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:17 -0800 Subject: [PATCH 346/493] perf cpumap: Rename empty functions Remove cpu_map from name as a cpu_map isn't used. Pass a const pointer rather than by value to avoid unnecessary copying. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-15-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 12 ++++++------ tools/perf/util/cpumap.c | 24 ++++++++++++------------ tools/perf/util/cpumap.h | 4 ++-- tools/perf/util/stat-display.c | 10 +++++----- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 40cb3518f27e..d06921cd3592 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1349,9 +1349,9 @@ static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __ static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, int cpu) { - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu])) + if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu])) config->cpus_aggr_map->map[cpu] = get_id(config, cpu); id = config->cpus_aggr_map->map[cpu]; @@ -1480,7 +1480,7 @@ static void perf_stat__exit_aggr_mode(void) static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); if (cpu != -1) id.socket = env->cpu[cpu].socket_id; @@ -1491,7 +1491,7 @@ static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(int cpu, void *data) static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); if (cpu != -1) { /* @@ -1509,7 +1509,7 @@ static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(int cpu, void *data) static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(int cpu, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); if (cpu != -1) { /* @@ -1527,7 +1527,7 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(int cpu, void *data) static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(int cpu, void *data) { - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); id.node = perf_env__numa_node(data, cpu); return id; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8fa00a6221c8..b3e1304aca0c 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -104,7 +104,7 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = cpu_map__empty_aggr_cpu_id(); + cpus->map[i] = aggr_cpu_id__empty(); refcount_set(&cpus->refcnt, 1); } @@ -130,7 +130,7 @@ int cpu_map__get_socket_id(int cpu) struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data __maybe_unused) { - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); id.socket = cpu_map__get_socket_id(cpu); return id; @@ -209,7 +209,7 @@ struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data) * make a unique ID. */ id = cpu_map__get_socket_aggr_by_cpu(cpu, data); - if (cpu_map__aggr_cpu_id_is_empty(id)) + if (aggr_cpu_id__is_empty(&id)) return id; id.die = die; @@ -234,7 +234,7 @@ struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data) /* cpu_map__get_die returns a struct with socket and die set*/ id = cpu_map__get_die_aggr_by_cpu(cpu, data); - if (cpu_map__aggr_cpu_id_is_empty(id)) + if (aggr_cpu_id__is_empty(&id)) return id; /* @@ -248,7 +248,7 @@ struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data) struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data __maybe_unused) { - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); id.node = cpu_map__get_node_id(cpu); return id; @@ -602,16 +602,16 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b a->core == b->core; } -bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) +bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) { - return a.thread == -1 && - a.node == -1 && - a.socket == -1 && - a.die == -1 && - a.core == -1; + return a->thread == -1 && + a->node == -1 && + a->socket == -1 && + a->die == -1 && + a->core == -1; } -struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) +struct aggr_cpu_id aggr_cpu_id__empty(void) { struct aggr_cpu_id ret = { .thread = -1, diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 652b76c69376..9589b0001a28 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -68,7 +68,7 @@ int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); -bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a); -struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void); +bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); +struct aggr_cpu_id aggr_cpu_id__empty(void); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 0241436bb1fb..870b1db71fbc 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -698,7 +698,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; if (cpu != -1) { - id = cpu_map__empty_aggr_cpu_id(); + id = aggr_cpu_id__empty(); id.core = cpu; } printout(config, id, nr, counter, uval, @@ -780,7 +780,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( continue; buf[i].counter = counter; - buf[i].id = cpu_map__empty_aggr_cpu_id(); + buf[i].id = aggr_cpu_id__empty(); buf[i].id.thread = thread; buf[i].uval = uval; buf[i].val = val; @@ -868,7 +868,7 @@ static void print_counter_aggr(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running, + printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); @@ -911,7 +911,7 @@ static void print_counter(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - id = cpu_map__empty_aggr_cpu_id(); + id = aggr_cpu_id__empty(); id.core = cpu; printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -938,7 +938,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { - id = cpu_map__empty_aggr_cpu_id(); + id = aggr_cpu_id__empty(); id.core = cpu; if (first) { aggr_printout(config, counter, id, 0); From 194a3a202564153493789997643181737a6ae4b9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:18 -0800 Subject: [PATCH 347/493] perf cpumap: Document cpu__get_node() and remove redundant function cpu_map__get_node_id() isn't used externally and merely delegates to cpu__get_node(). Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-16-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 11 +++++------ tools/perf/util/cpumap.h | 5 ++++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index b3e1304aca0c..1626b0991408 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -16,6 +16,10 @@ static int max_cpu_num; static int max_present_cpu_num; static int max_node_num; +/** + * The numa node X as read from /sys/devices/system/node/nodeX indexed by the + * CPU number. + */ static int *cpunode_map; static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) @@ -222,11 +226,6 @@ int cpu_map__get_core_id(int cpu) return ret ?: value; } -int cpu_map__get_node_id(int cpu) -{ - return cpu__get_node(cpu); -} - struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data) { struct aggr_cpu_id id; @@ -250,7 +249,7 @@ struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data __maybe_unu { struct aggr_cpu_id id = aggr_cpu_id__empty(); - id.node = cpu_map__get_node_id(cpu); + id.node = cpu__get_node(cpu); return id; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 9589b0001a28..f849f01c5860 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -45,7 +45,6 @@ int cpu_map__get_die_id(int cpu); struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data); int cpu_map__get_core_id(int cpu); struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data); -int cpu_map__get_node_id(int cpu); struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data); int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); @@ -58,6 +57,10 @@ int cpu__setup_cpunode_map(void); int cpu__max_node(void); int cpu__max_cpu(void); int cpu__max_present_cpu(void); +/** + * cpu__get_node - Returns the numa node X as read from + * /sys/devices/system/node/nodeX for the given CPU. + */ int cpu__get_node(int cpu); int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, From 4e90e5cc74c6b1c1b9abff8b53cec5be1fb5e839 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:19 -0800 Subject: [PATCH 348/493] perf cpumap: Remove map from function names that don't use a map Move to the cpu name and document for consistency. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-17-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 12 ++++++------ tools/perf/util/cpumap.h | 19 ++++++++++++++++--- tools/perf/util/env.c | 6 +++--- tools/perf/util/stat.c | 4 ++-- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 1626b0991408..e0d7f1da5858 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -126,7 +126,7 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value) return sysfs__read_int(path, value); } -int cpu_map__get_socket_id(int cpu) +int cpu__get_socket_id(int cpu) { int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value); return ret ?: value; @@ -136,7 +136,7 @@ struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data __maybe_u { struct aggr_cpu_id id = aggr_cpu_id__empty(); - id.socket = cpu_map__get_socket_id(cpu); + id.socket = cpu__get_socket_id(cpu); return id; } @@ -190,7 +190,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, return 0; } -int cpu_map__get_die_id(int cpu) +int cpu__get_die_id(int cpu) { int value, ret = cpu__get_topology_int(cpu, "die_id", &value); @@ -202,7 +202,7 @@ struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data) struct aggr_cpu_id id; int die; - die = cpu_map__get_die_id(cpu); + die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ if (die == -1) die = 0; @@ -220,7 +220,7 @@ struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data) return id; } -int cpu_map__get_core_id(int cpu) +int cpu__get_core_id(int cpu) { int value, ret = cpu__get_topology_int(cpu, "core_id", &value); return ret ?: value; @@ -229,7 +229,7 @@ int cpu_map__get_core_id(int cpu) struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data) { struct aggr_cpu_id id; - int core = cpu_map__get_core_id(cpu); + int core = cpu__get_core_id(cpu); /* cpu_map__get_die returns a struct with socket and die set*/ id = cpu_map__get_die_aggr_by_cpu(cpu, data); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f849f01c5860..a053bf31a3f0 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -39,11 +39,8 @@ struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); -int cpu_map__get_socket_id(int cpu); struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data); -int cpu_map__get_die_id(int cpu); struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data); -int cpu_map__get_core_id(int cpu); struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data); int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); @@ -62,6 +59,22 @@ int cpu__max_present_cpu(void); * /sys/devices/system/node/nodeX for the given CPU. */ int cpu__get_node(int cpu); +/** + * cpu__get_socket_id - Returns the socket number as read from + * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU. + */ +int cpu__get_socket_id(int cpu); +/** + * cpu__get_die_id - Returns the die id as read from + * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU. + */ +int cpu__get_die_id(int cpu); +/** + * cpu__get_core_id - Returns the core id as read from + * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU. + */ +int cpu__get_core_id(int cpu); + int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, struct aggr_cpu_id (*f)(int cpu, void *data), diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index b9904896eb97..fd12c0dcaefb 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -302,9 +302,9 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return -ENOMEM; for (cpu = 0; cpu < nr_cpus; ++cpu) { - env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); - env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); - env->cpu[cpu].die_id = cpu_map__get_die_id(cpu); + env->cpu[cpu].core_id = cpu__get_core_id(cpu); + env->cpu[cpu].socket_id = cpu__get_socket_id(cpu); + env->cpu[cpu].die_id = cpu__get_die_id(cpu); } env->nr_cpus_avail = nr_cpus; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5ed99bcfe91e..5c24aca0968c 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -328,7 +328,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket_id(cpu); + s = cpu__get_socket_id(cpu); if (s < 0) return -1; @@ -336,7 +336,7 @@ static int check_per_pkg(struct evsel *counter, * On multi-die system, die_id > 0. On no-die system, die_id = 0. * We use hashmap(socket, die) to check the used socket+die pair. */ - d = cpu_map__get_die_id(cpu); + d = cpu__get_die_id(cpu); if (d < 0) return -1; From adff2c634357115a0f94a9a5054061b497df7f72 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:20 -0800 Subject: [PATCH 349/493] perf cpumap: Remove cpu_map__cpu(), use libperf function Switch the remaining few users of cpu_map__cpu() to perf_cpu_map__cpu() and remove the function. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-18-irogers@google.com [ Did the conversion to perf_ftrace__latency_prepare_bpf() as well, used when building with BUILD_BPF_SKEL=1 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 2 +- tools/perf/util/bpf_ftrace.c | 2 +- tools/perf/util/cpumap.c | 9 ++------- tools/perf/util/cpumap.h | 1 - 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 2b54e2ddc80a..f16c39a37a52 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -281,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap) int ret; int last_cpu; - last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); + last_cpu = perf_cpu_map__cpu(cpumap, cpumap->nr - 1); mask_size = last_cpu / 4 + 2; /* one more byte for EOS */ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index f00a2de6778c..28dc4c60c788 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -63,7 +63,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { - cpu = cpu_map__cpu(ftrace->evlist->core.cpus, i); + cpu = perf_cpu_map__cpu(ftrace->evlist->core.cpus, i); bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); } } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index e0d7f1da5858..32f9fc2dd389 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -485,11 +485,6 @@ bool cpu_map__has(struct perf_cpu_map *cpus, int cpu) return perf_cpu_map__idx(cpus, cpu) != -1; } -int cpu_map__cpu(struct perf_cpu_map *cpus, int idx) -{ - return cpus->map[idx]; -} - size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) { int i, cpu, start = -1; @@ -547,7 +542,7 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) int i, cpu; char *ptr = buf; unsigned char *bitmap; - int last_cpu = cpu_map__cpu(map, map->nr - 1); + int last_cpu = perf_cpu_map__cpu(map, map->nr - 1); if (buf == NULL) return 0; @@ -559,7 +554,7 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) } for (i = 0; i < map->nr; i++) { - cpu = cpu_map__cpu(map, i); + cpu = perf_cpu_map__cpu(map, i); bitmap[cpu / 8] |= 1 << (cpu % 8); } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index a053bf31a3f0..87545bcd461d 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -80,7 +80,6 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, struct aggr_cpu_id (*f)(int cpu, void *data), void *data); -int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); From 5f50e15c1510c77b37e10c6b22912bf4bf11476b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:21 -0800 Subject: [PATCH 350/493] perf cpumap: Refactor cpu_map__build_map() Turn it into a cpu_aggr_map__new(). Pass helper functions. Refactor builtin-stat calls to manually pass function pointers. Try to reduce some copy-paste code. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-19-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 195 ++++++++++++++++++++------------------ tools/perf/util/cpumap.c | 61 +++++------- tools/perf/util/cpumap.h | 16 ++-- 3 files changed, 135 insertions(+), 137 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d06921cd3592..c55a7fee22bc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1322,6 +1322,17 @@ static struct option stat_options[] = { OPT_END() }; +static const char *const aggr_mode__string[] = { + [AGGR_CORE] = "core", + [AGGR_DIE] = "die", + [AGGR_GLOBAL] = "global", + [AGGR_NODE] = "node", + [AGGR_NONE] = "none", + [AGGR_SOCKET] = "socket", + [AGGR_THREAD] = "thread", + [AGGR_UNSET] = "unset", +}; + static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, int cpu) { @@ -1394,54 +1405,67 @@ static bool term_percore_set(void) return false; } -static int perf_stat_init_aggr_mode(void) +static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) { - int nr; - - switch (stat_config.aggr_mode) { + switch (aggr_mode) { case AGGR_SOCKET: - if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build socket map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_socket_cached; - break; + return cpu_map__get_socket_aggr_by_cpu; case AGGR_DIE: - if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build die map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_die_cached; - break; + return cpu_map__get_die_aggr_by_cpu; case AGGR_CORE: - if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_cached; - break; + return cpu_map__get_core_aggr_by_cpu; case AGGR_NODE: - if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_node_cached; - break; + return cpu_map__get_node_aggr_by_cpu; case AGGR_NONE: - if (term_percore_set()) { - if (cpu_map__build_core_map(evsel_list->core.cpus, - &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_cached; - } - break; + if (term_percore_set()) + return cpu_map__get_core_aggr_by_cpu; + + return NULL; case AGGR_GLOBAL: case AGGR_THREAD: case AGGR_UNSET: default: - break; + return NULL; + } +} + +static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) +{ + switch (aggr_mode) { + case AGGR_SOCKET: + return perf_stat__get_socket_cached; + case AGGR_DIE: + return perf_stat__get_die_cached; + case AGGR_CORE: + return perf_stat__get_core_cached; + case AGGR_NODE: + return perf_stat__get_node_cached; + case AGGR_NONE: + if (term_percore_set()) { + return perf_stat__get_core_cached; + } + return NULL; + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + return NULL; + } +} + +static int perf_stat_init_aggr_mode(void) +{ + int nr; + aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); + + if (get_id) { + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, + get_id, /*data=*/NULL); + if (!stat_config.aggr_map) { + pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); + return -1; + } + stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); } /* @@ -1533,30 +1557,6 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(int cpu, void *data) return id; } -static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **sockp) -{ - return cpu_map__build_map(cpus, sockp, perf_env__get_socket_aggr_by_cpu, env); -} - -static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **diep) -{ - return cpu_map__build_map(cpus, diep, perf_env__get_die_aggr_by_cpu, env); -} - -static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **corep) -{ - return cpu_map__build_map(cpus, corep, perf_env__get_core_aggr_by_cpu, env); -} - -static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **nodep) -{ - return cpu_map__build_map(cpus, nodep, perf_env__get_node_aggr_by_cpu, env); -} - static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, int cpu) { @@ -1580,47 +1580,60 @@ static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *conf return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } -static int perf_stat_init_aggr_mode_file(struct perf_stat *st) +static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) { - struct perf_env *env = &st->session->header.env; - - switch (stat_config.aggr_mode) { + switch (aggr_mode) { case AGGR_SOCKET: - if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build socket map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_socket_file; - break; + return perf_env__get_socket_aggr_by_cpu; case AGGR_DIE: - if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build die map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_die_file; - break; + return perf_env__get_die_aggr_by_cpu; case AGGR_CORE: - if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_file; - break; + return perf_env__get_core_aggr_by_cpu; case AGGR_NODE: - if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_node_file; - break; + return perf_env__get_node_aggr_by_cpu; case AGGR_NONE: case AGGR_GLOBAL: case AGGR_THREAD: case AGGR_UNSET: default: - break; + return NULL; } +} +static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) +{ + switch (aggr_mode) { + case AGGR_SOCKET: + return perf_stat__get_socket_file; + case AGGR_DIE: + return perf_stat__get_die_file; + case AGGR_CORE: + return perf_stat__get_core_file; + case AGGR_NODE: + return perf_stat__get_node_file; + case AGGR_NONE: + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + return NULL; + } +} + +static int perf_stat_init_aggr_mode_file(struct perf_stat *st) +{ + struct perf_env *env = &st->session->header.env; + aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); + + if (!get_id) + return 0; + + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, get_id, env); + if (!stat_config.aggr_map) { + pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); + return -1; + } + stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode); return 0; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 32f9fc2dd389..c8f9b3f15759 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -140,7 +140,7 @@ struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data __maybe_u return id; } -static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) +static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) { struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; @@ -157,37 +157,40 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->thread - b->thread; } -int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, - struct aggr_cpu_id (*f)(int cpu, void *data), - void *data) +struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, + aggr_cpu_id_get_t get_id, + void *data) { - int nr = cpus->nr; - struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr); - int cpu, s2; - struct aggr_cpu_id s1; + int cpu, idx; + struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr); if (!c) - return -1; + return NULL; /* Reset size as it may only be partially filled */ c->nr = 0; - for (cpu = 0; cpu < nr; cpu++) { - s1 = f(cpu, data); - for (s2 = 0; s2 < c->nr; s2++) { - if (aggr_cpu_id__equal(&s1, &c->map[s2])) + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + bool duplicate = false; + struct aggr_cpu_id cpu_id = get_id(cpu, data); + + for (int j = 0; j < c->nr; j++) { + if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) { + duplicate = true; break; + } } - if (s2 == c->nr) { - c->map[c->nr] = s1; + if (!duplicate) { + c->map[c->nr] = cpu_id; c->nr++; } } - /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id); - *res = c; - return 0; + /* ensure we process id in increasing order */ + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); + + return c; + } int cpu__get_die_id(int cpu) @@ -253,26 +256,6 @@ struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data __maybe_unu return id; } -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) -{ - return cpu_map__build_map(cpus, sockp, cpu_map__get_socket_aggr_by_cpu, NULL); -} - -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) -{ - return cpu_map__build_map(cpus, diep, cpu_map__get_die_aggr_by_cpu, NULL); -} - -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) -{ - return cpu_map__build_map(cpus, corep, cpu_map__get_core_aggr_by_cpu, NULL); -} - -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap) -{ - return cpu_map__build_map(cpus, numap, cpu_map__get_node_aggr_by_cpu, NULL); -} - /* setup simple routines to easily access node numbers given a cpu number */ static int get_max_num(char *path, int *max) { diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 87545bcd461d..611048e2a592 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -43,10 +43,6 @@ struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data); struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data); -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ int cpu__setup_cpunode_map(void); @@ -75,10 +71,16 @@ int cpu__get_die_id(int cpu); */ int cpu__get_core_id(int cpu); +typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(int cpu, void *data); -int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, - struct aggr_cpu_id (*f)(int cpu, void *data), - void *data); +/** + * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in + * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value + * passed to it. The cpu_aggr_map is sorted with duplicate values removed. + */ +struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, + aggr_cpu_id_get_t get_id, + void *data); bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); From 973aeb3c7ada35b75442126c745bb6074cb3e172 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:22 -0800 Subject: [PATCH 351/493] perf cpumap: Rename cpu_map__get_X_aggr_by_cpu functions The functions don't use a cpu_map so reduce them to being like constructors of aggr_cpu_id. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-20-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 18 +++++++++--------- tools/perf/tests/topology.c | 8 ++++---- tools/perf/util/cpumap.c | 14 +++++++------- tools/perf/util/cpumap.h | 29 +++++++++++++++++++++++++---- 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c55a7fee22bc..a518fcf0b3f8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1336,25 +1336,25 @@ static const char *const aggr_mode__string[] = { static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, int cpu) { - return cpu_map__get_socket_aggr_by_cpu(cpu, /*data=*/NULL); + return aggr_cpu_id__socket(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, int cpu) { - return cpu_map__get_die_aggr_by_cpu(cpu, /*data=*/NULL); + return aggr_cpu_id__die(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, int cpu) { - return cpu_map__get_core_aggr_by_cpu(cpu, /*data=*/NULL); + return aggr_cpu_id__core(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, int cpu) { - return cpu_map__get_node_aggr_by_cpu(cpu, /*data=*/NULL); + return aggr_cpu_id__node(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, @@ -1409,16 +1409,16 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) { switch (aggr_mode) { case AGGR_SOCKET: - return cpu_map__get_socket_aggr_by_cpu; + return aggr_cpu_id__socket; case AGGR_DIE: - return cpu_map__get_die_aggr_by_cpu; + return aggr_cpu_id__die; case AGGR_CORE: - return cpu_map__get_core_aggr_by_cpu; + return aggr_cpu_id__core; case AGGR_NODE: - return cpu_map__get_node_aggr_by_cpu; + return aggr_cpu_id__node; case AGGR_NONE: if (term_percore_set()) - return cpu_map__get_core_aggr_by_cpu; + return aggr_cpu_id__core; return NULL; case AGGR_GLOBAL: diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 5992b323c4f5..0cb7b015b4b9 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -121,7 +121,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that core ID contains socket, die and core for (i = 0; i < map->nr; i++) { - id = cpu_map__get_core_aggr_by_cpu(perf_cpu_map__cpu(map, i), NULL); + id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", session->header.env.cpu[map->map[i]].core_id == id.core); @@ -136,7 +136,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that die ID contains socket and die for (i = 0; i < map->nr; i++) { - id = cpu_map__get_die_aggr_by_cpu(perf_cpu_map__cpu(map, i), NULL); + id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == id.socket); @@ -150,7 +150,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that socket ID contains only socket for (i = 0; i < map->nr; i++) { - id = cpu_map__get_socket_aggr_by_cpu(perf_cpu_map__cpu(map, i), NULL); + id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == id.socket); @@ -162,7 +162,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that node ID contains only node for (i = 0; i < map->nr; i++) { - id = cpu_map__get_node_aggr_by_cpu(perf_cpu_map__cpu(map, i), NULL); + id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index c8f9b3f15759..19e502cc65e7 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -132,7 +132,7 @@ int cpu__get_socket_id(int cpu) return ret ?: value; } -struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data __maybe_unused) +struct aggr_cpu_id aggr_cpu_id__socket(int cpu, void *data __maybe_unused) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -200,7 +200,7 @@ int cpu__get_die_id(int cpu) return ret ?: value; } -struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data) +struct aggr_cpu_id aggr_cpu_id__die(int cpu, void *data) { struct aggr_cpu_id id; int die; @@ -215,7 +215,7 @@ struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data) * with the socket ID and then add die to * make a unique ID. */ - id = cpu_map__get_socket_aggr_by_cpu(cpu, data); + id = aggr_cpu_id__socket(cpu, data); if (aggr_cpu_id__is_empty(&id)) return id; @@ -229,13 +229,13 @@ int cpu__get_core_id(int cpu) return ret ?: value; } -struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data) +struct aggr_cpu_id aggr_cpu_id__core(int cpu, void *data) { struct aggr_cpu_id id; int core = cpu__get_core_id(cpu); - /* cpu_map__get_die returns a struct with socket and die set*/ - id = cpu_map__get_die_aggr_by_cpu(cpu, data); + /* aggr_cpu_id__die returns a struct with socket and die set*/ + id = aggr_cpu_id__die(cpu, data); if (aggr_cpu_id__is_empty(&id)) return id; @@ -248,7 +248,7 @@ struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data) } -struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data __maybe_unused) +struct aggr_cpu_id aggr_cpu_id__node(int cpu, void *data __maybe_unused) { struct aggr_cpu_id id = aggr_cpu_id__empty(); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 611048e2a592..ecd658293a2d 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -39,10 +39,6 @@ struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); -struct aggr_cpu_id cpu_map__get_socket_aggr_by_cpu(int cpu, void *data); -struct aggr_cpu_id cpu_map__get_die_aggr_by_cpu(int cpu, void *data); -struct aggr_cpu_id cpu_map__get_core_aggr_by_cpu(int cpu, void *data); -struct aggr_cpu_id cpu_map__get_node_aggr_by_cpu(int cpu, void *data); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ int cpu__setup_cpunode_map(void); @@ -88,4 +84,29 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); struct aggr_cpu_id aggr_cpu_id__empty(void); + +/** + * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with + * the socket for cpu. The function signature is compatible with + * aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__socket(int cpu, void *data); +/** + * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated + * with the die and socket for cpu. The function signature is compatible with + * aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__die(int cpu, void *data); +/** + * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket + * populated with the core, die and socket for cpu. The function signature is + * compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__core(int cpu, void *data); +/** + * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for + * cpu. The function signature is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__node(int cpu, void *data); + #endif /* __PERF_CPUMAP_H */ From dfc66beff7fa95b9eb507ccb48fb325569bc2f74 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:23 -0800 Subject: [PATCH 352/493] perf cpumap: Move 'has' function to libperf Make the cpu map argument const for consistency with the rest of the API. Modify cpu_map__idx accordingly. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-21-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/libperf.txt | 1 + tools/lib/perf/cpumap.c | 7 ++++++- tools/lib/perf/include/internal/cpumap.h | 2 +- tools/lib/perf/include/perf/cpumap.h | 1 + tools/lib/perf/libperf.map | 1 + tools/perf/arch/arm/util/cs-etm.c | 16 ++++++++-------- tools/perf/builtin-sched.c | 6 +++--- tools/perf/tests/topology.c | 2 +- tools/perf/util/cpumap.c | 5 ----- tools/perf/util/cpumap.h | 2 -- tools/perf/util/cputopo.c | 2 +- 11 files changed, 23 insertions(+), 22 deletions(-) diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 63ae5e0195ce..faef9ba3a540 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -48,6 +48,7 @@ SYNOPSIS int perf_cpu_map__nr(const struct perf_cpu_map *cpus); bool perf_cpu_map__empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); + bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) -- diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index adaad3dddf6e..3c36a06771af 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -268,7 +268,7 @@ bool perf_cpu_map__empty(const struct perf_cpu_map *map) return map ? map->map[0] == -1 : true; } -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, int cpu) { int low = 0, high = cpus->nr; @@ -288,6 +288,11 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) return -1; } +bool perf_cpu_map__has(const struct perf_cpu_map *cpus, int cpu) +{ + return perf_cpu_map__idx(cpus, cpu) != -1; +} + int perf_cpu_map__max(struct perf_cpu_map *map) { // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 4054169c12c5..71a31ed738c9 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -23,6 +23,6 @@ struct perf_cpu_map { #define MAX_NR_CPUS 2048 #endif -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, int cpu); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 7c27766ea0bf..3f1c0afa3ccd 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -20,6 +20,7 @@ LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 5979bf92d98f..93696affda2e 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -10,6 +10,7 @@ LIBPERF_0.0.1 { perf_cpu_map__cpu; perf_cpu_map__empty; perf_cpu_map__max; + perf_cpu_map__has; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 8a3d54a86c9c..129c0272d65b 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -204,8 +204,8 @@ static int cs_etm_set_option(struct auxtrace_record *itr, /* Set option of each CPU we have */ for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(event_cpus, i) || - !cpu_map__has(online_cpus, i)) + if (!perf_cpu_map__has(event_cpus, i) || + !perf_cpu_map__has(online_cpus, i)) continue; if (option & BIT(ETM_OPT_CTXTID)) { @@ -523,8 +523,8 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, /* cpu map is not empty, we have specific CPUs to work with */ if (!perf_cpu_map__empty(event_cpus)) { for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(event_cpus, i) || - !cpu_map__has(online_cpus, i)) + if (!perf_cpu_map__has(event_cpus, i) || + !perf_cpu_map__has(online_cpus, i)) continue; if (cs_etm_is_ete(itr, i)) @@ -537,7 +537,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, } else { /* get configuration for all CPUs in the system */ for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(online_cpus, i)) + if (!perf_cpu_map__has(online_cpus, i)) continue; if (cs_etm_is_ete(itr, i)) @@ -722,8 +722,8 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, } else { /* Make sure all specified CPUs are online */ for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) { - if (cpu_map__has(event_cpus, i) && - !cpu_map__has(online_cpus, i)) + if (perf_cpu_map__has(event_cpus, i) && + !perf_cpu_map__has(online_cpus, i)) return -EINVAL; } @@ -744,7 +744,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, offset = CS_ETM_SNAPSHOT + 1; for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++) - if (cpu_map__has(cpu_map, i)) + if (perf_cpu_map__has(cpu_map, i)) cs_etm_get_metadata(i, &offset, itr, info); perf_cpu_map__put(online_cpus); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 4527f632ebe4..9da1da4749c9 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1617,10 +1617,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, if (curr_thread && thread__has_color(curr_thread)) pid_color = COLOR_PIDS; - if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu)) continue; - if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) + if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) cpu_color = COLOR_CPUS; if (cpu != this_cpu) @@ -1639,7 +1639,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, color_fprintf(stdout, color, " "); } - if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) goto out; timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 0cb7b015b4b9..cb29ea7ec409 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -112,7 +112,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); for (i = 0; i < session->header.env.nr_cpus_avail; i++) { - if (!cpu_map__has(map, i)) + if (!perf_cpu_map__has(map, i)) continue; pr_debug("CPU %d, core %d, socket %d\n", i, session->header.env.cpu[i].core_id, diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 19e502cc65e7..f1d76a8e92e8 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -463,11 +463,6 @@ int cpu__setup_cpunode_map(void) return 0; } -bool cpu_map__has(struct perf_cpu_map *cpus, int cpu) -{ - return perf_cpu_map__idx(cpus, cpu) != -1; -} - size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) { int i, cpu, start = -1; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index ecd658293a2d..32b8b5178f01 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -78,8 +78,6 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, aggr_cpu_id_get_t get_id, void *data); -bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); - bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); struct aggr_cpu_id aggr_cpu_id__empty(void); diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 51b429c86f98..8affb37d90e7 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -218,7 +218,7 @@ struct cpu_topology *cpu_topology__new(void) tp->core_cpus_list = addr; for (i = 0; i < nr; i++) { - if (!cpu_map__has(map, i)) + if (!perf_cpu_map__has(map, i)) continue; ret = build_cpu_topology(tp, i); From 92aad5c33f531187cc6013c8e51620212cdfefe1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:24 -0800 Subject: [PATCH 353/493] perf cpumap: Add some comments to cpu_aggr_map Move cpu_aggr_map__empty_new() to be with other cpu_aggr_map function. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-22-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 32b8b5178f01..25a08d640d81 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -24,16 +24,18 @@ struct aggr_cpu_id { int core; }; +/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */ struct cpu_aggr_map { refcount_t refcnt; + /** Number of valid entries. */ int nr; + /** The entries. */ struct aggr_cpu_id map[]; }; struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); -struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); @@ -67,6 +69,12 @@ int cpu__get_die_id(int cpu); */ int cpu__get_core_id(int cpu); +/** + * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry + * being empty. + */ +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); + typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(int cpu, void *data); /** From bd26bddfd93688d10984251249b84e1f6d91de27 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:25 -0800 Subject: [PATCH 354/493] perf cpumap: Trim the cpu_aggr_map cpu_aggr_map__new() removes duplicates, when this happens shrink the array. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-23-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index f1d76a8e92e8..2779474f39db 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -185,7 +185,15 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, c->nr++; } } + /* Trim. */ + if (c->nr != cpus->nr) { + struct cpu_aggr_map *trimmed_c = + realloc(c, + sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr); + if (trimmed_c) + c = trimmed_c; + } /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); From f9e891ea172235f902972069b87be3bdc7c48f5a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:26 -0800 Subject: [PATCH 355/493] perf stat: Fix memory leak in check_per_pkg() If the key is already present then free the key used for lookup. Found with: $ perf stat -M IO_Read_BW /bin/true ==1749112==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 4 object(s) allocated from: #0 0x7f6f6fa7d7cf in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55acecd9d7a6 in check_per_pkg util/stat.c:343 #2 0x55acecd9d9c5 in process_counter_values util/stat.c:365 #3 0x55acecd9e0ab in process_counter_maps util/stat.c:421 #4 0x55acecd9e292 in perf_stat_process_counter util/stat.c:443 #5 0x55aceca8553e in read_counters ./tools/perf/builtin-stat.c:470 #6 0x55aceca88fe3 in __run_perf_stat ./tools/perf/builtin-stat.c:1023 #7 0x55aceca89146 in run_perf_stat ./tools/perf/builtin-stat.c:1048 #8 0x55aceca90858 in cmd_stat ./tools/perf/builtin-stat.c:2555 #9 0x55acecc05fa5 in run_builtin ./tools/perf/perf.c:313 #10 0x55acecc064fe in handle_internal_command ./tools/perf/perf.c:365 #11 0x55acecc068bb in run_argv ./tools/perf/perf.c:409 #12 0x55acecc070aa in main ./tools/perf/perf.c:539 Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-24-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5c24aca0968c..c69b221f5e3e 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -345,9 +345,10 @@ static int check_per_pkg(struct evsel *counter, return -ENOMEM; *key = (uint64_t)d << 32 | s; - if (hashmap__find(mask, (void *)key, NULL)) + if (hashmap__find(mask, (void *)key, NULL)) { *skip = true; - else + free(key); + } else ret = hashmap__add(mask, (void *)key, (void *)1); return ret; From 34794913e2dc08a464499f795073a021feeb3b47 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:27 -0800 Subject: [PATCH 356/493] perf cpumap: Add CPU to aggr_cpu_id With no aggregration, such as 'perf stat -A', the aggr_cpu_id lacks a way to describe per CPU aggregation and the core is set to the CPU in places like print_counter_aggrdata in stat-display.c. Setting the core to the CPU is undesirable as the CPU will exceed valid core values and lead to confusion. Add a CPU variable to address this. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-25-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 19 +++++++++++++++++++ tools/perf/util/cpumap.c | 25 +++++++++++++++++++++---- tools/perf/util/cpumap.h | 8 ++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index cb29ea7ec409..33e4cb81265c 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -119,6 +119,22 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[i].socket_id); } + // Test that CPU ID contains socket, die, core and CPU + for (i = 0; i < map->nr; i++) { + id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL); + TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", map->map[i] == id.cpu); + + TEST_ASSERT_VAL("Cpu map - Core ID doesn't match", + session->header.env.cpu[map->map[i]].core_id == id.core); + TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == id.socket); + + TEST_ASSERT_VAL("Cpu map - Die ID doesn't match", + session->header.env.cpu[map->map[i]].die_id == id.die); + TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1); + } + // Test that core ID contains socket, die and core for (i = 0; i < map->nr; i++) { id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL); @@ -145,6 +161,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Die map - CPU is set", id.cpu == -1); TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1); } @@ -157,6 +174,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu == -1); TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1); } @@ -168,6 +186,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Node map - CPU is set", id.cpu == -1); TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 2779474f39db..48ce583af0ec 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -242,7 +242,7 @@ struct aggr_cpu_id aggr_cpu_id__core(int cpu, void *data) struct aggr_cpu_id id; int core = cpu__get_core_id(cpu); - /* aggr_cpu_id__die returns a struct with socket and die set*/ + /* aggr_cpu_id__die returns a struct with socket and die set. */ id = aggr_cpu_id__die(cpu, data); if (aggr_cpu_id__is_empty(&id)) return id; @@ -256,6 +256,20 @@ struct aggr_cpu_id aggr_cpu_id__core(int cpu, void *data) } +struct aggr_cpu_id aggr_cpu_id__cpu(int cpu, void *data) +{ + struct aggr_cpu_id id; + + /* aggr_cpu_id__core returns a struct with socket, die and core set. */ + id = aggr_cpu_id__core(cpu, data); + if (aggr_cpu_id__is_empty(&id)) + return id; + + id.cpu = cpu; + return id; + +} + struct aggr_cpu_id aggr_cpu_id__node(int cpu, void *data __maybe_unused) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -579,7 +593,8 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b a->node == b->node && a->socket == b->socket && a->die == b->die && - a->core == b->core; + a->core == b->core && + a->cpu == b->cpu; } bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) @@ -588,7 +603,8 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) a->node == -1 && a->socket == -1 && a->die == -1 && - a->core == -1; + a->core == -1 && + a->cpu == -1; } struct aggr_cpu_id aggr_cpu_id__empty(void) @@ -598,7 +614,8 @@ struct aggr_cpu_id aggr_cpu_id__empty(void) .node = -1, .socket = -1, .die = -1, - .core = -1 + .core = -1, + .cpu = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 25a08d640d81..b98cd1739677 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -22,6 +22,8 @@ struct aggr_cpu_id { int die; /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */ int core; + /** CPU aggregation, note there is one CPU for each SMT thread. */ + int cpu; }; /** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */ @@ -109,6 +111,12 @@ struct aggr_cpu_id aggr_cpu_id__die(int cpu, void *data); * compatible with aggr_cpu_id_get_t. */ struct aggr_cpu_id aggr_cpu_id__core(int cpu, void *data); +/** + * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket + * populated with the cpu, core, die and socket for cpu. The function signature + * is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__cpu(int cpu, void *data); /** * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for * cpu. The function signature is compatible with aggr_cpu_id_get_t. From 7365f105e37429d28757f7f68d4850723ce18aa1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:28 -0800 Subject: [PATCH 357/493] perf stat-display: Avoid use of core for CPU Correct use of cpumap index in print_no_aggr_metric(). Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-26-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 45 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 870b1db71fbc..f48d1678861c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -120,11 +120,10 @@ static void aggr_printout(struct perf_stat_config *config, id.die, config->csv_output ? 0 : -3, id.core, config->csv_sep); - } else if (id.core > -1) { + } else if (id.cpu > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - evsel__cpus(evsel)->map[id.core], - config->csv_sep); + id.cpu, config->csv_sep); } break; case AGGR_THREAD: @@ -334,7 +333,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, int cpu, idx; if (config->aggr_mode == AGGR_NONE) - return id->core; + return id->cpu; if (!config->aggr_get_id) return 0; @@ -697,10 +696,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - if (cpu != -1) { - id = aggr_cpu_id__empty(); - id.core = cpu; - } + if (cpu != -1) + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + printout(config, id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); if (!metric_only) @@ -911,8 +909,7 @@ static void print_counter(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - id = aggr_cpu_id__empty(); - id.core = cpu; + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -924,29 +921,31 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - int cpu; - int nrcpus = 0; - struct evsel *counter; - u64 ena, run, val; - double uval; - struct aggr_cpu_id id; + int all_idx, cpu; - nrcpus = evlist->core.cpus->nr; - for (cpu = 0; cpu < nrcpus; cpu++) { + perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.cpus) { + struct evsel *counter; bool first = true; if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { - id = aggr_cpu_id__empty(); - id.core = cpu; + u64 ena, run, val; + double uval; + struct aggr_cpu_id id; + int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); + + if (counter_idx < 0) + continue; + + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { aggr_printout(config, counter, id, 0); first = false; } - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; + val = perf_counts(counter->counts, counter_idx, 0)->val; + ena = perf_counts(counter->counts, counter_idx, 0)->ena; + run = perf_counts(counter->counts, counter_idx, 0)->run; uval = val * counter->scale; printout(config, id, 0, counter, uval, prefix, From 2ca0a3718da24953689b1771589ac63b60f17358 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:29 -0800 Subject: [PATCH 358/493] perf evsel: Derive CPUs and threads in alloc_counts Passing the number of CPUs and threads allows for an evsel's counts to be mismatched to its cpu map. To avoid this always derive the counts size from the cpu map. Change openat-syscall-all-cpus to set the cpus to allow for this to work. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-27-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/openat-syscall-all-cpus.c | 10 +--------- tools/perf/util/counts.c | 8 ++++++-- tools/perf/util/counts.h | 2 +- tools/perf/util/evsel.c | 2 +- tools/perf/util/stat.c | 13 ++++++------- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index cd3dd463783f..544db0839b3b 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -85,15 +85,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb CPU_CLR(cpus->map[cpu], &cpu_set); } - /* - * Here we need to explicitly preallocate the counts, as if - * we use the auto allocation it will allocate just for 1 cpu, - * as we start by cpu 0. - */ - if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { - pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr); - goto out_close_fd; - } + evsel->core.cpus = perf_cpu_map__get(cpus); err = 0; diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 582f3aeaf5e4..2b81707b9dba 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -4,6 +4,7 @@ #include #include "evsel.h" #include "counts.h" +#include #include struct perf_counts *perf_counts__new(int ncpus, int nthreads) @@ -55,9 +56,12 @@ void evsel__reset_counts(struct evsel *evsel) perf_counts__reset(evsel->counts); } -int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads) +int evsel__alloc_counts(struct evsel *evsel) { - evsel->counts = perf_counts__new(ncpus, nthreads); + struct perf_cpu_map *cpus = evsel__cpus(evsel); + int nthreads = perf_thread_map__nr(evsel->core.threads); + + evsel->counts = perf_counts__new(cpus ? cpus->nr : 1, nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 7ff36bf6d644..3e275e9c60d1 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -40,7 +40,7 @@ void perf_counts__delete(struct perf_counts *counts); void perf_counts__reset(struct perf_counts *counts); void evsel__reset_counts(struct evsel *evsel); -int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads); +int evsel__alloc_counts(struct evsel *evsel); void evsel__free_counts(struct evsel *evsel); #endif /* __PERF_COUNTS_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a0acf53a2510..2de569a1a272 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1578,7 +1578,7 @@ int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale) if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) + if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0) return -ENOMEM; if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c69b221f5e3e..995cb5003133 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -152,11 +152,13 @@ static void evsel__free_stat_priv(struct evsel *evsel) zfree(&evsel->stats); } -static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads) +static int evsel__alloc_prev_raw_counts(struct evsel *evsel) { + int cpu_map_nr = evsel__nr_cpus(evsel); + int nthreads = perf_thread_map__nr(evsel->core.threads); struct perf_counts *counts; - counts = perf_counts__new(ncpus, nthreads); + counts = perf_counts__new(cpu_map_nr, nthreads); if (counts) evsel->prev_raw_counts = counts; @@ -177,12 +179,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel) static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { - int ncpus = evsel__nr_cpus(evsel); - int nthreads = perf_thread_map__nr(evsel->core.threads); - if (evsel__alloc_stat_priv(evsel) < 0 || - evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || - (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + evsel__alloc_counts(evsel) < 0 || + (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0)) return -ENOMEM; return 0; From 7e3d1784c8a4d9c643a6ed0a2c44ee94dee8f7a6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:30 -0800 Subject: [PATCH 359/493] libperf: Switch cpu to more accurate cpu_map_idx Modify variable names and adopt perf_cpu_map__for_each_cpu() in perf_evsel__open(). Renaming is done by looking for consistency in API usage. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-28-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 88 +++++++++++++++-------------- tools/lib/perf/include/perf/evsel.h | 10 ++-- 2 files changed, 50 insertions(+), 48 deletions(-) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 68f83d2c27c1..8028b5a4da69 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) -#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) +#define FD(_evsel, _cpu_map_idx, _thread) \ + ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread)) +#define MMAP(_evsel, _cpu_map_idx, _thread) \ + (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \ + : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); if (evsel->fd) { - int cpu, thread; - for (cpu = 0; cpu < ncpus; cpu++) { + int idx, thread; + + for (idx = 0; idx < ncpus; idx++) { for (thread = 0; thread < nthreads; thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); if (fd) *fd = -1; @@ -80,7 +84,7 @@ sys_perf_event_open(struct perf_event_attr *attr, return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } -static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) +static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; int *fd; @@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou if (!leader->fd) return -ENOTCONN; - fd = FD(leader, cpu, thread); + fd = FD(leader, cpu_map_idx, thread); if (fd == NULL || *fd == -1) return -EBADF; @@ -109,7 +113,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - int cpu, thread, err = 0; + int cpu, idx, thread, err = 0; if (cpus == NULL) { static struct perf_cpu_map *empty_cpu_map; @@ -139,21 +143,21 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) return -ENOMEM; - for (cpu = 0; cpu < cpus->nr; cpu++) { + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { for (thread = 0; thread < threads->nr; thread++) { int fd, group_fd, *evsel_fd; - evsel_fd = FD(evsel, cpu, thread); + evsel_fd = FD(evsel, idx, thread); if (evsel_fd == NULL) return -EINVAL; - err = get_group_fd(evsel, cpu, thread, &group_fd); + err = get_group_fd(evsel, idx, thread, &group_fd); if (err < 0) return err; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, - cpus->map[cpu], group_fd, 0); + cpu, group_fd, 0); if (fd < 0) return -errno; @@ -165,12 +169,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, return err; } -static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) +static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); if (fd && *fd >= 0) { close(*fd); @@ -181,10 +185,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) void perf_evsel__close_fd(struct perf_evsel *evsel) { - int cpu; - - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) - perf_evsel__close_fd_cpu(evsel, cpu); + for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++) + perf_evsel__close_fd_cpu(evsel, idx); } void perf_evsel__free_fd(struct perf_evsel *evsel) @@ -202,29 +204,29 @@ void perf_evsel__close(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); } -void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) +void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx) { if (evsel->fd == NULL) return; - perf_evsel__close_fd_cpu(evsel, cpu); + perf_evsel__close_fd_cpu(evsel, cpu_map_idx); } void perf_evsel__munmap(struct perf_evsel *evsel) { - int cpu, thread; + int idx, thread; if (evsel->fd == NULL || evsel->mmap == NULL) return; - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(MMAP(evsel, cpu, thread)); + perf_mmap__munmap(MMAP(evsel, idx, thread)); } } @@ -234,7 +236,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel) int perf_evsel__mmap(struct perf_evsel *evsel, int pages) { - int ret, cpu, thread; + int ret, idx, thread; struct perf_mmap_param mp = { .prot = PROT_READ | PROT_WRITE, .mask = (pages * page_size) - 1, @@ -246,18 +248,18 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0) return -ENOMEM; - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); struct perf_mmap *map; if (fd == NULL || *fd < 0) continue; - map = MMAP(evsel, cpu, thread); + map = MMAP(evsel, idx, thread); perf_mmap__init(map, NULL, false, NULL); - ret = perf_mmap__mmap(map, &mp, *fd, cpu); + ret = perf_mmap__mmap(map, &mp, *fd, idx); if (ret) { perf_evsel__munmap(evsel); return ret; @@ -268,14 +270,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) return 0; } -void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) +void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); - if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL) return NULL; - return MMAP(evsel, cpu, thread)->base; + return MMAP(evsel, cpu_map_idx, thread)->base; } int perf_evsel__read_size(struct perf_evsel *evsel) @@ -303,19 +305,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel) return size; } -int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); memset(count, 0, sizeof(*count)); if (fd == NULL || *fd < 0) return -EINVAL; - if (MMAP(evsel, cpu, thread) && - !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) + if (MMAP(evsel, cpu_map_idx, thread) && + !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count)) return 0; if (readn(*fd, count->values, size) <= 0) @@ -326,13 +328,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg, - int cpu) + int cpu_map_idx) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { int err; - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); if (fd == NULL || *fd < 0) return -1; @@ -346,9 +348,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, return 0; } -int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); } int perf_evsel__enable(struct perf_evsel *evsel) @@ -361,9 +363,9 @@ int perf_evsel__enable(struct perf_evsel *evsel) return err; } -int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx); } int perf_evsel__disable(struct perf_evsel *evsel) diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index f401c7484bec..2a9516b42d15 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -28,16 +28,16 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); -LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages); LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); -LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); -LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); +LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); From 47ffe806674f67e729627edd689b10827b1790eb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:31 -0800 Subject: [PATCH 360/493] libperf: Use cpu not index for evsel mmap Fix issue where evsel's CPU map index was being used as the mmap cpu. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-29-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8028b5a4da69..f1e1665ef4bd 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -252,6 +252,7 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { int *fd = FD(evsel, idx, thread); struct perf_mmap *map; + int cpu = perf_cpu_map__cpu(evsel->cpus, idx); if (fd == NULL || *fd < 0) continue; @@ -259,7 +260,7 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) map = MMAP(evsel, idx, thread); perf_mmap__init(map, NULL, false, NULL); - ret = perf_mmap__mmap(map, &mp, *fd, idx); + ret = perf_mmap__mmap(map, &mp, *fd, cpu); if (ret) { perf_evsel__munmap(evsel); return ret; From 7316268ff740c29dfb52649ff8074a5aa17ec0ce Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:32 -0800 Subject: [PATCH 361/493] perf counts: Switch name cpu to cpu_map_idx Try to reduce confusion in particular when the cpu map doesn't contain an entry for every CPU. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-30-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/counts.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 3e275e9c60d1..5de275194f2b 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -18,21 +18,21 @@ struct perf_counts { static inline struct perf_counts_values* -perf_counts(struct perf_counts *counts, int cpu, int thread) +perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread) { - return xyarray__entry(counts->values, cpu, thread); + return xyarray__entry(counts->values, cpu_map_idx, thread); } static inline bool -perf_counts__is_loaded(struct perf_counts *counts, int cpu, int thread) +perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread) { - return *((bool *) xyarray__entry(counts->loaded, cpu, thread)); + return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)); } static inline void -perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool loaded) +perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded) { - *((bool *) xyarray__entry(counts->loaded, cpu, thread)) = loaded; + *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded; } struct perf_counts *perf_counts__new(int ncpus, int nthreads); From ab90caa7b2d0b708cfee16b33325ca24de4d8f25 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:33 -0800 Subject: [PATCH 362/493] perf stat: Rename aggr_data cpu to imply it's an index Trying to make cpu maps less error prone. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-31-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f48d1678861c..7e933a8fee68 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -626,7 +626,7 @@ struct aggr_data { u64 ena, run, val; struct aggr_cpu_id id; int nr; - int cpu; + int cpu_map_idx; }; static void aggr_cb(struct perf_stat_config *config, @@ -878,9 +878,9 @@ static void counter_cb(struct perf_stat_config *config __maybe_unused, { struct aggr_data *ad = data; - ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; + ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run; } /* @@ -897,7 +897,7 @@ static void print_counter(struct perf_stat_config *config, struct aggr_cpu_id id; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { - struct aggr_data ad = { .cpu = cpu }; + struct aggr_data ad = { .cpu_map_idx = cpu }; if (!collect_data(config, counter, counter_cb, &ad)) return; From 7ea82fbee4598e51e8bf47566b252cd5745d5b17 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:34 -0800 Subject: [PATCH 363/493] perf stat: Use perf_cpu_map__for_each_cpu() Correct in print_counter() where an index was being used as a cpu. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-32-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7e933a8fee68..0f192360b6c6 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "color.h" #include "counts.h" #include "evlist.h" @@ -732,7 +733,7 @@ static void print_aggr(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { print_counter_aggrdata(config, counter, s, prefix, metric_only, - &first, -1); + &first, /*cpu=*/-1); } if (metric_only) fputc('\n', output); @@ -893,11 +894,11 @@ static void print_counter(struct perf_stat_config *config, FILE *output = config->output; u64 ena, run, val; double uval; - int cpu; + int idx, cpu; struct aggr_cpu_id id; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { - struct aggr_data ad = { .cpu_map_idx = cpu }; + perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { + struct aggr_data ad = { .cpu_map_idx = idx }; if (!collect_data(config, counter, counter_cb, &ad)) return; @@ -1248,7 +1249,7 @@ static void print_percore(struct perf_stat_config *config, print_counter_aggrdata(config, counter, s, prefix, metric_only, - &first, -1); + &first, /*cpu=*/-1); } if (metric_only) From f9551b3f6249cfe8ea5b5f8716675ccf2f6ec737 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:35 -0800 Subject: [PATCH 364/493] perf script: Use for each cpu to aid readability Use perf_cpu_map__for_each_cpu() to help with readability. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-33-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c9b3002ec254..f40319144856 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2115,8 +2115,7 @@ static struct scripting_ops *scripting_ops; static void __process_stat(struct evsel *counter, u64 tstamp) { int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = evsel__nr_cpus(counter); - int cpu, thread; + int idx, cpu, thread; static int header_printed; if (counter->core.system_wide) @@ -2129,13 +2128,13 @@ static void __process_stat(struct evsel *counter, u64 tstamp) } for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { + perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { struct perf_counts_values *counts; counts = perf_counts(counter->counts, cpu, thread); printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n", - counter->core.cpus->map[cpu], + cpu, perf_thread_map__pid(counter->core.threads, thread), counts->val, counts->ena, From 80b82f3b65e94ba22d3f12a98f7ecc56cc14c903 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:36 -0800 Subject: [PATCH 365/493] libperf: Allow NULL in perf_cpu_map__idx() Return -1, not found, if NULL is passed. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-34-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/cpumap.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 3c36a06771af..eacea3ab965a 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -270,8 +270,13 @@ bool perf_cpu_map__empty(const struct perf_cpu_map *map) int perf_cpu_map__idx(const struct perf_cpu_map *cpus, int cpu) { - int low = 0, high = cpus->nr; + int low, high; + if (!cpus) + return -1; + + low = 0; + high = cpus->nr; while (low < high) { int idx = (low + high) / 2, cpu_at_idx = cpus->map[idx]; From 472832d2c000b9611feaea66fe521055c3dbf17a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:37 -0800 Subject: [PATCH 366/493] perf evlist: Refactor evlist__for_each_cpu() Previously evlist__for_each_cpu() needed to iterate over the evlist in an inner loop and call "skip" routines. Refactor this so that the iteratr is smarter and the next function can update both the current CPU and evsel. By using a cpu map index, fix apparent off-by-1 in __run_perf_stat's call to perf_evsel__close_cpu(). Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-35-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 183 ++++++++++++++++++-------------------- tools/perf/util/evlist.c | 148 ++++++++++++++++-------------- tools/perf/util/evlist.h | 50 +++++++++-- tools/perf/util/evsel.h | 1 - 4 files changed, 213 insertions(+), 169 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a518fcf0b3f8..f84116c9e016 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -405,36 +405,33 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) static int read_affinity_counters(struct timespec *rs) { - struct evsel *counter; - struct affinity affinity; - int i, ncpus, cpu; + struct evlist_cpu_iterator evlist_cpu_itr; + struct affinity saved_affinity, *affinity; if (all_counters_use_bpf) return 0; - if (affinity__setup(&affinity) < 0) - return -1; - - ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus); if (!target__has_cpu(&target) || target__has_per_thread(&target)) - ncpus = 1; - evlist__for_each_cpu(evsel_list, i, cpu) { - if (i >= ncpus) - break; - affinity__set(&affinity, cpu); + affinity = NULL; + else if (affinity__setup(&saved_affinity) < 0) + return -1; + else + affinity = &saved_affinity; - evlist__for_each_entry(evsel_list, counter) { - if (evsel__cpu_iter_skip(counter, cpu)) - continue; - if (evsel__is_bpf(counter)) - continue; - if (!counter->err) { - counter->err = read_counter_cpu(counter, rs, - counter->cpu_iter - 1); - } + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { + struct evsel *counter = evlist_cpu_itr.evsel; + + if (evsel__is_bpf(counter)) + continue; + + if (!counter->err) { + counter->err = read_counter_cpu(counter, rs, + evlist_cpu_itr.cpu_map_idx); } } - affinity__cleanup(&affinity); + if (affinity) + affinity__cleanup(&saved_affinity); + return 0; } @@ -788,8 +785,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int status = 0; const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int i, cpu, err; + int err; bool second_pass = false; if (forks) { @@ -813,56 +811,53 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) all_counters_use_bpf = false; } - evlist__for_each_cpu (evsel_list, i, cpu) { + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + counter = evlist_cpu_itr.evsel; + /* * bperf calls evsel__open_per_cpu() in bperf__load(), so * no need to call it again here. */ if (target.use_bpf) break; - affinity__set(&affinity, cpu); - evlist__for_each_entry(evsel_list, counter) { - if (evsel__cpu_iter_skip(counter, cpu)) - continue; - if (counter->reset_group || counter->errored) - continue; - if (evsel__is_bpf(counter)) - continue; + if (counter->reset_group || counter->errored) + continue; + if (evsel__is_bpf(counter)) + continue; try_again: - if (create_perf_stat_counter(counter, &stat_config, &target, - counter->cpu_iter - 1) < 0) { - - /* - * Weak group failed. We cannot just undo this here - * because earlier CPUs might be in group mode, and the kernel - * doesn't support mixing group and non group reads. Defer - * it to later. - * Don't close here because we're in the wrong affinity. - */ - if ((errno == EINVAL || errno == EBADF) && - evsel__leader(counter) != counter && - counter->weak_group) { - evlist__reset_weak_group(evsel_list, counter, false); - assert(counter->reset_group); - second_pass = true; - continue; - } - - switch (stat_handle_error(counter)) { - case COUNTER_FATAL: - return -1; - case COUNTER_RETRY: - goto try_again; - case COUNTER_SKIP: - continue; - default: - break; - } + if (create_perf_stat_counter(counter, &stat_config, &target, + evlist_cpu_itr.cpu_map_idx) < 0) { + /* + * Weak group failed. We cannot just undo this here + * because earlier CPUs might be in group mode, and the kernel + * doesn't support mixing group and non group reads. Defer + * it to later. + * Don't close here because we're in the wrong affinity. + */ + if ((errno == EINVAL || errno == EBADF) && + evsel__leader(counter) != counter && + counter->weak_group) { + evlist__reset_weak_group(evsel_list, counter, false); + assert(counter->reset_group); + second_pass = true; + continue; } - counter->supported = true; + + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again; + case COUNTER_SKIP: + continue; + default: + break; + } + } + counter->supported = true; } if (second_pass) { @@ -871,42 +866,40 @@ try_again: * and also close errored counters. */ - evlist__for_each_cpu(evsel_list, i, cpu) { - affinity__set(&affinity, cpu); - /* First close errored or weak retry */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->reset_group && !counter->errored) - continue; - if (evsel__cpu_iter_skip_no_inc(counter, cpu)) - continue; - perf_evsel__close_cpu(&counter->core, counter->cpu_iter); - } - /* Now reopen weak */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->reset_group && !counter->errored) - continue; - if (evsel__cpu_iter_skip(counter, cpu)) - continue; - if (!counter->reset_group) - continue; -try_again_reset: - pr_debug2("reopening weak %s\n", evsel__name(counter)); - if (create_perf_stat_counter(counter, &stat_config, &target, - counter->cpu_iter - 1) < 0) { + /* First close errored or weak retry */ + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + counter = evlist_cpu_itr.evsel; - switch (stat_handle_error(counter)) { - case COUNTER_FATAL: - return -1; - case COUNTER_RETRY: - goto try_again_reset; - case COUNTER_SKIP: - continue; - default: - break; - } + if (!counter->reset_group && !counter->errored) + continue; + + perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx); + } + /* Now reopen weak */ + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + counter = evlist_cpu_itr.evsel; + + if (!counter->reset_group && !counter->errored) + continue; + if (!counter->reset_group) + continue; +try_again_reset: + pr_debug2("reopening weak %s\n", evsel__name(counter)); + if (create_perf_stat_counter(counter, &stat_config, &target, + evlist_cpu_itr.cpu_map_idx) < 0) { + + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again_reset; + case COUNTER_SKIP: + continue; + default: + break; } - counter->supported = true; } + counter->supported = true; } } affinity__cleanup(&affinity); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5f92319ce258..39d294f6c321 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -342,36 +342,65 @@ static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) return perf_thread_map__nr(evlist->core.threads); } -void evlist__cpu_iter_start(struct evlist *evlist) +struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) { - struct evsel *pos; + struct evlist_cpu_iterator itr = { + .container = evlist, + .evsel = evlist__first(evlist), + .cpu_map_idx = 0, + .evlist_cpu_map_idx = 0, + .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus), + .cpu = -1, + .affinity = affinity, + }; - /* - * Reset the per evsel cpu_iter. This is needed because - * each evsel's cpumap may have a different index space, - * and some operations need the index to modify - * the FD xyarray (e.g. open, close) - */ - evlist__for_each_entry(evlist, pos) - pos->cpu_iter = 0; -} - -bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu) -{ - if (ev->cpu_iter >= ev->core.cpus->nr) - return true; - if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu) - return true; - return false; -} - -bool evsel__cpu_iter_skip(struct evsel *ev, int cpu) -{ - if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) { - ev->cpu_iter++; - return false; + if (itr.affinity) { + itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); + affinity__set(itr.affinity, itr.cpu); + itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu); + /* + * If this CPU isn't in the evsel's cpu map then advance through + * the list. + */ + if (itr.cpu_map_idx == -1) + evlist_cpu_iterator__next(&itr); } - return true; + return itr; +} + +void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr) +{ + while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) { + evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel); + evlist_cpu_itr->cpu_map_idx = + perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, + evlist_cpu_itr->cpu); + if (evlist_cpu_itr->cpu_map_idx != -1) + return; + } + evlist_cpu_itr->evlist_cpu_map_idx++; + if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) { + evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container); + evlist_cpu_itr->cpu = + perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus, + evlist_cpu_itr->evlist_cpu_map_idx); + if (evlist_cpu_itr->affinity) + affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu); + evlist_cpu_itr->cpu_map_idx = + perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, + evlist_cpu_itr->cpu); + /* + * If this CPU isn't in the evsel's cpu map then advance through + * the list. + */ + if (evlist_cpu_itr->cpu_map_idx == -1) + evlist_cpu_iterator__next(evlist_cpu_itr); + } +} + +bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr) +{ + return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr; } static int evsel__strcmp(struct evsel *pos, char *evsel_name) @@ -400,31 +429,26 @@ static int evlist__is_enabled(struct evlist *evlist) static void __evlist__disable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int cpu, i, imm = 0; bool has_imm = false; if (affinity__setup(&affinity) < 0) return; /* Disable 'immediate' events last */ - for (imm = 0; imm <= 1; imm++) { - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - - evlist__for_each_entry(evlist, pos) { - if (evsel__strcmp(pos, evsel_name)) - continue; - if (evsel__cpu_iter_skip(pos, cpu)) - continue; - if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) - continue; - if (pos->immediate) - has_imm = true; - if (pos->immediate != imm) - continue; - evsel__disable_cpu(pos, pos->cpu_iter - 1); - } + for (int imm = 0; imm <= 1; imm++) { + evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + pos = evlist_cpu_itr.evsel; + if (evsel__strcmp(pos, evsel_name)) + continue; + if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) + continue; + if (pos->immediate) + has_imm = true; + if (pos->immediate != imm) + continue; + evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx); } if (!has_imm) break; @@ -462,24 +486,19 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name) static void __evlist__enable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int cpu, i; if (affinity__setup(&affinity) < 0) return; - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - - evlist__for_each_entry(evlist, pos) { - if (evsel__strcmp(pos, evsel_name)) - continue; - if (evsel__cpu_iter_skip(pos, cpu)) - continue; - if (!evsel__is_group_leader(pos) || !pos->core.fd) - continue; - evsel__enable_cpu(pos, pos->cpu_iter - 1); - } + evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + pos = evlist_cpu_itr.evsel; + if (evsel__strcmp(pos, evsel_name)) + continue; + if (!evsel__is_group_leader(pos) || !pos->core.fd) + continue; + evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx); } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { @@ -1264,8 +1283,8 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel) void evlist__close(struct evlist *evlist) { struct evsel *evsel; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int cpu, i; /* * With perf record core.cpus is usually NULL. @@ -1279,15 +1298,12 @@ void evlist__close(struct evlist *evlist) if (affinity__setup(&affinity) < 0) return; - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - evlist__for_each_entry_reverse(evlist, evsel) { - if (evsel__cpu_iter_skip(evsel, cpu)) - continue; - perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1); - } + evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core, + evlist_cpu_itr.cpu_map_idx); } + affinity__cleanup(&affinity); evlist__for_each_entry_reverse(evlist, evsel) { perf_evsel__free_fd(&evsel->core); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 27594900a052..57828ebfcb61 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -327,17 +327,53 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); #define evlist__for_each_entry_safe(evlist, tmp, evsel) \ __evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel) -#define evlist__for_each_cpu(evlist, index, cpu) \ - evlist__cpu_iter_start(evlist); \ - perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus) +/** Iterator state for evlist__for_each_cpu */ +struct evlist_cpu_iterator { + /** The list being iterated through. */ + struct evlist *container; + /** The current evsel of the iterator. */ + struct evsel *evsel; + /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */ + int cpu_map_idx; + /** + * The CPU map index corresponding to evlist->core.all_cpus for the + * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may + * contain fewer entries. + */ + int evlist_cpu_map_idx; + /** The number of CPU map entries in evlist->core.all_cpus. */ + int evlist_cpu_map_nr; + /** The current CPU of the iterator. */ + int cpu; + /** If present, used to set the affinity when switching between CPUs. */ + struct affinity *affinity; +}; + +/** + * evlist__for_each_cpu - without affinity, iterate over the evlist. With + * affinity, iterate over all CPUs and then the evlist + * for each evsel on that CPU. When switching between + * CPUs the affinity is set to the CPU to avoid IPIs + * during syscalls. + * @evlist_cpu_itr: the iterator instance. + * @evlist: evlist instance to iterate. + * @affinity: NULL or used to set the affinity to the current CPU. + */ +#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \ + for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \ + !evlist_cpu_iterator__end(&evlist_cpu_itr); \ + evlist_cpu_iterator__next(&evlist_cpu_itr)) + +/** Returns an iterator set to the first CPU/evsel of evlist. */ +struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity); +/** Move to next element in iterator, updating CPU, evsel and the affinity. */ +void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr); +/** Returns true when iterator is at the end of the CPUs and evlist. */ +bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr); struct evsel *evlist__get_tracking_event(struct evlist *evlist); void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); -void evlist__cpu_iter_start(struct evlist *evlist); -bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); -bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); - struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 99aa3363def7..7cb7c9c77ab0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -121,7 +121,6 @@ struct evsel { bool errored; struct hashmap *per_pkg_mask; int err; - int cpu_iter; struct { evsel__sb_cb_t *cb; void *data; From 7ac0089d138f80dcd7ba8ca368a9b2bdfe780b16 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:38 -0800 Subject: [PATCH 367/493] perf evsel: Pass cpu not cpu map index to synthesize evsel__write_stat_event() was incorrectly passing a cpu map index rather than a CPU to perf_event__synthesize_stat(). Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-36-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f84116c9e016..ed993c20772f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -327,10 +327,11 @@ static int write_stat_round_event(u64 tm, u64 type) #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) -static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, +static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread, struct perf_counts_values *count) { - struct perf_sample_id *sid = SID(counter, cpu, thread); + struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread); + int cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx); return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, process_synthesized_event, NULL); From da8c94c065174099853a207d9716a49d339b265f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:39 -0800 Subject: [PATCH 368/493] perf stat: Correct variable name for read counter Switch from cpu to cpu_map_idx to reduce confusion. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-37-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 24 ++++++++++++------------ tools/perf/util/evsel.c | 30 +++++++++++++++--------------- tools/perf/util/evsel.h | 16 ++++++++-------- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ed993c20772f..dfb8f7847e6c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -337,25 +337,25 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t process_synthesized_event, NULL); } -static int read_single_counter(struct evsel *counter, int cpu, +static int read_single_counter(struct evsel *counter, int cpu_map_idx, int thread, struct timespec *rs) { if (counter->tool_event == PERF_TOOL_DURATION_TIME) { u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; struct perf_counts_values *count = - perf_counts(counter->counts, cpu, thread); + perf_counts(counter->counts, cpu_map_idx, thread); count->ena = count->run = val; count->val = val; return 0; } - return evsel__read_counter(counter, cpu, thread); + return evsel__read_counter(counter, cpu_map_idx, thread); } /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) +static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx) { int nthreads = perf_thread_map__nr(evsel_list->core.threads); int thread; @@ -369,24 +369,24 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) for (thread = 0; thread < nthreads; thread++) { struct perf_counts_values *count; - count = perf_counts(counter->counts, cpu, thread); + count = perf_counts(counter->counts, cpu_map_idx, thread); /* * The leader's group read loads data into its group members * (via evsel__read_counter()) and sets their count->loaded. */ - if (!perf_counts__is_loaded(counter->counts, cpu, thread) && - read_single_counter(counter, cpu, thread, rs)) { + if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) && + read_single_counter(counter, cpu_map_idx, thread, rs)) { counter->counts->scaled = -1; - perf_counts(counter->counts, cpu, thread)->ena = 0; - perf_counts(counter->counts, cpu, thread)->run = 0; + perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0; + perf_counts(counter->counts, cpu_map_idx, thread)->run = 0; return -1; } - perf_counts__set_loaded(counter->counts, cpu, thread, false); + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false); if (STAT_RECORD) { - if (evsel__write_stat_event(counter, cpu, thread, count)) { + if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) { pr_err("failed to write stat event\n"); return -1; } @@ -396,7 +396,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) fprintf(stat_config.output, "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", evsel__name(counter), - cpu, + perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx), count->val, count->ena, count->run); } } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2de569a1a272..8f539a81b30b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1476,11 +1476,11 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, count->run = count->run - tmp.run; } -static int evsel__read_one(struct evsel *evsel, int cpu, int thread) +static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread) { - struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); + struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread); - return perf_evsel__read(&evsel->core, cpu, thread, count); + return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count); } static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run) @@ -1530,7 +1530,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, return 0; } -static int evsel__read_group(struct evsel *leader, int cpu, int thread) +static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; @@ -1551,42 +1551,42 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread) ps->group_data = data; } - if (FD(leader, cpu, thread) < 0) + if (FD(leader, cpu_map_idx, thread) < 0) return -EINVAL; - if (readn(FD(leader, cpu, thread), data, size) <= 0) + if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0) return -errno; - return evsel__process_group_data(leader, cpu, thread, data); + return evsel__process_group_data(leader, cpu_map_idx, thread, data); } -int evsel__read_counter(struct evsel *evsel, int cpu, int thread) +int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) { u64 read_format = evsel->core.attr.read_format; if (read_format & PERF_FORMAT_GROUP) - return evsel__read_group(evsel, cpu, thread); + return evsel__read_group(evsel, cpu_map_idx, thread); - return evsel__read_one(evsel, cpu, thread); + return evsel__read_one(evsel, cpu_map_idx, thread); } -int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale) +int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale) { struct perf_counts_values count; size_t nv = scale ? 3 : 1; - if (FD(evsel, cpu, thread) < 0) + if (FD(evsel, cpu_map_idx, thread) < 0) return -EINVAL; if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0) return -ENOMEM; - if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) + if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0) return -errno; - evsel__compute_deltas(evsel, cpu, thread, &count); + evsel__compute_deltas(evsel, cpu_map_idx, thread, &count); perf_counts_values__scale(&count, scale, NULL); - *perf_counts(evsel->counts, cpu, thread) = count; + *perf_counts(evsel->counts, cpu_map_idx, thread) = count; return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 7cb7c9c77ab0..c3db41282400 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -333,32 +333,32 @@ static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) (e1->core.attr.config == e2->core.attr.config); } -int evsel__read_counter(struct evsel *evsel, int cpu, int thread); +int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); -int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale); +int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); /** * evsel__read_on_cpu - Read out the results on a CPU and thread * * @evsel - event selector to read value - * @cpu - CPU of interest + * @cpu_map_idx - CPU of interest * @thread - thread of interest */ -static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread) +static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread) { - return __evsel__read_on_cpu(evsel, cpu, thread, false); + return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false); } /** * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled * * @evsel - event selector to read value - * @cpu - CPU of interest + * @cpu_map_idx - CPU of interest * @thread - thread of interest */ -static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread) +static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread) { - return __evsel__read_on_cpu(evsel, cpu, thread, true); + return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } int evsel__parse_sample(struct evsel *evsel, union perf_event *event, From 2daa08c4d9cd9d0845094d718920e5d105c11558 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:40 -0800 Subject: [PATCH 369/493] perf evsel: Rename CPU around get_group_fd CPU is really a cpu map index, change names to make code more intention revealing. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-38-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8f539a81b30b..45338be3f501 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1591,27 +1591,27 @@ int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool } static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other, - int cpu) + int cpu_map_idx) { - int cpuid; + int cpu; - cpuid = perf_cpu_map__cpu(evsel->core.cpus, cpu); - return perf_cpu_map__idx(other->core.cpus, cpuid); + cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); + return perf_cpu_map__idx(other->core.cpus, cpu); } -static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu) +static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx) { struct evsel *leader = evsel__leader(evsel); if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) || (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) { - return evsel__match_other_cpu(evsel, leader, cpu); + return evsel__match_other_cpu(evsel, leader, cpu_map_idx); } - return cpu; + return cpu_map_idx; } -static int get_group_fd(struct evsel *evsel, int cpu, int thread) +static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) { struct evsel *leader = evsel__leader(evsel); int fd; @@ -1625,11 +1625,11 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) */ BUG_ON(!leader->core.fd); - cpu = evsel__hybrid_group_cpu(evsel, cpu); - if (cpu == -1) + cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx); + if (cpu_map_idx == -1) return -1; - fd = FD(leader, cpu, thread); + fd = FD(leader, cpu_map_idx, thread); BUG_ON(fd == -1); return fd; From 1fa497d4c01d497e25131ccdd5def6f24dd1f330 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:41 -0800 Subject: [PATCH 370/493] perf evsel: Reduce scope of evsel__ignore_missing_thread Move to being static. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-39-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 ++++---- tools/perf/util/evsel.h | 4 ---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 45338be3f501..97348d302156 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1666,10 +1666,10 @@ static int update_fds(struct evsel *evsel, return 0; } -bool evsel__ignore_missing_thread(struct evsel *evsel, - int nr_cpus, int cpu, - struct perf_thread_map *threads, - int thread, int err) +static bool evsel__ignore_missing_thread(struct evsel *evsel, + int nr_cpus, int cpu, + struct perf_thread_map *threads, + int thread, int err) { pid_t ignore_pid = perf_thread_map__pid(threads, thread); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c3db41282400..84e597f6c395 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -301,10 +301,6 @@ bool evsel__detect_missing_features(struct evsel *evsel); enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX }; bool evsel__increase_rlimit(enum rlimit_action *set_rlimit); -bool evsel__ignore_missing_thread(struct evsel *evsel, - int nr_cpus, int cpu, - struct perf_thread_map *threads, - int thread, int err); bool evsel__precise_ip_fallback(struct evsel *evsel); struct perf_sample; From 6f844b1fdd3bc3a25995ff83edea32a73bfa72d9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:42 -0800 Subject: [PATCH 371/493] perf evsel: Rename variable cpu to index Make naming less error prone. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-40-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 83 +++++++++++++++++++++-------------------- tools/perf/util/evsel.h | 6 +-- tools/perf/util/stat.c | 4 +- tools/perf/util/stat.h | 2 +- 4 files changed, 48 insertions(+), 47 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 97348d302156..796923c80ff6 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1372,9 +1372,9 @@ int evsel__append_addr_filter(struct evsel *evsel, const char *filter) } /* Caller has to clear disabled after going through all CPUs. */ -int evsel__enable_cpu(struct evsel *evsel, int cpu) +int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx) { - return perf_evsel__enable_cpu(&evsel->core, cpu); + return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx); } int evsel__enable(struct evsel *evsel) @@ -1387,9 +1387,9 @@ int evsel__enable(struct evsel *evsel) } /* Caller has to set disabled after going through all CPUs. */ -int evsel__disable_cpu(struct evsel *evsel, int cpu) +int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx) { - return perf_evsel__disable_cpu(&evsel->core, cpu); + return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx); } int evsel__disable(struct evsel *evsel) @@ -1455,7 +1455,7 @@ void evsel__delete(struct evsel *evsel) free(evsel); } -void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, +void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -1463,12 +1463,12 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, if (!evsel->prev_raw_counts) return; - if (cpu == -1) { + if (cpu_map_idx == -1) { tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); - *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; } count->val = count->val - tmp.val; @@ -1483,20 +1483,21 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread) return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count); } -static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run) +static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, + u64 val, u64 ena, u64 run) { struct perf_counts_values *count; - count = perf_counts(counter->counts, cpu, thread); + count = perf_counts(counter->counts, cpu_map_idx, thread); count->val = val; count->ena = ena; count->run = run; - perf_counts__set_loaded(counter->counts, cpu, thread, true); + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); } -static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data) +static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data) { u64 read_format = leader->core.attr.read_format; struct sample_read_value *v; @@ -1515,7 +1516,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, v = (struct sample_read_value *) data; - evsel__set_count(leader, cpu, thread, v[0].value, ena, run); + evsel__set_count(leader, cpu_map_idx, thread, v[0].value, ena, run); for (i = 1; i < nr; i++) { struct evsel *counter; @@ -1524,7 +1525,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, if (!counter) return -EINVAL; - evsel__set_count(counter, cpu, thread, v[i].value, ena, run); + evsel__set_count(counter, cpu_map_idx, thread, v[i].value, ena, run); } return 0; @@ -1643,16 +1644,16 @@ static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int } static int update_fds(struct evsel *evsel, - int nr_cpus, int cpu_idx, + int nr_cpus, int cpu_map_idx, int nr_threads, int thread_idx) { struct evsel *pos; - if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads) return -EINVAL; evlist__for_each_entry(evsel->evlist, pos) { - nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx; evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); @@ -1667,7 +1668,7 @@ static int update_fds(struct evsel *evsel, } static bool evsel__ignore_missing_thread(struct evsel *evsel, - int nr_cpus, int cpu, + int nr_cpus, int cpu_map_idx, struct perf_thread_map *threads, int thread, int err) { @@ -1692,7 +1693,7 @@ static bool evsel__ignore_missing_thread(struct evsel *evsel, * We should remove fd for missing_thread first * because thread_map__remove() will decrease threads->nr. */ - if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread)) return false; if (thread_map__remove(threads, thread)) @@ -1974,9 +1975,9 @@ bool evsel__increase_rlimit(enum rlimit_action *set_rlimit) static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, - int start_cpu, int end_cpu) + int start_cpu_map_idx, int end_cpu_map_idx) { - int cpu, thread, nthreads; + int idx, thread, nthreads; int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; @@ -2003,7 +2004,7 @@ fallback_missing_features: display_attr(&evsel->core.attr); - for (cpu = start_cpu; cpu < end_cpu; cpu++) { + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; @@ -2014,17 +2015,17 @@ retry_open: if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); - group_fd = get_group_fd(evsel, cpu, thread); + group_fd = get_group_fd(evsel, idx, thread); test_attr__ready(); pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, cpus->map[cpu], group_fd, evsel->open_flags); + pid, cpus->map[idx], group_fd, evsel->open_flags); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu], + fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[idx], group_fd, evsel->open_flags); - FD(evsel, cpu, thread) = fd; + FD(evsel, idx, thread) = fd; if (fd < 0) { err = -errno; @@ -2034,10 +2035,10 @@ retry_open: goto try_fallback; } - bpf_counter__install_pe(evsel, cpu, fd); + bpf_counter__install_pe(evsel, idx, fd); if (unlikely(test_attr__enabled)) { - test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], + test_attr__open(&evsel->core.attr, pid, cpus->map[idx], fd, group_fd, evsel->open_flags); } @@ -2078,7 +2079,7 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { + if (evsel__ignore_missing_thread(evsel, cpus->nr, idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ nthreads--; @@ -2093,7 +2094,7 @@ try_fallback: if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit)) goto retry_open; - if (err != -EINVAL || cpu > 0 || thread > 0) + if (err != -EINVAL || idx > 0 || thread > 0) goto out_close; if (evsel__detect_missing_features(evsel)) @@ -2105,12 +2106,12 @@ out_close: old_errno = errno; do { while (--thread >= 0) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; } thread = nthreads; - } while (--cpu >= 0); + } while (--idx >= 0); errno = old_errno; return err; } @@ -2127,13 +2128,13 @@ void evsel__close(struct evsel *evsel) perf_evsel__free_id(&evsel->core); } -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu) +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) { - if (cpu == -1) + if (cpu_map_idx == -1) return evsel__open_cpu(evsel, cpus, NULL, 0, cpus ? cpus->nr : 1); - return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1); + return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1); } int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) @@ -2958,15 +2959,15 @@ struct perf_env *evsel__env(struct evsel *evsel) static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) { - int cpu, thread; + int cpu_map_idx, thread; - for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) { + for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) { for (thread = 0; thread < xyarray__max_y(evsel->core.fd); thread++) { - int fd = FD(evsel, cpu, thread); + int fd = FD(evsel, cpu_map_idx, thread); if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, - cpu, thread, fd) < 0) + cpu_map_idx, thread, fd) < 0) return -1; } } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 84e597f6c395..5720ceebffac 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -284,12 +284,12 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); int evsel__append_addr_filter(struct evsel *evsel, const char *filter); -int evsel__enable_cpu(struct evsel *evsel, int cpu); +int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); -int evsel__disable_cpu(struct evsel *evsel, int cpu); +int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx); -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu); +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx); int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 995cb5003133..f7f9757eba23 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -531,7 +531,7 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, - int cpu) + int cpu_map_idx) { struct perf_event_attr *attr = &evsel->core.attr; struct evsel *leader = evsel__leader(evsel); @@ -585,7 +585,7 @@ int create_perf_stat_counter(struct evsel *evsel, } if (target__has_cpu(target) && !target__has_per_thread(target)) - return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); + return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx); return evsel__open_per_thread(evsel, evsel->core.threads); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 32cf24186229..5e25d53e891b 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -248,7 +248,7 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, - int cpu); + int cpu_map_idx); void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv); From aa11e55a39950c0151e12abd30c7223dfc6f6a2c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:43 -0800 Subject: [PATCH 372/493] perf test: Use perf_cpu_map__for_each_cpu() Clean up variable naming to make cpu and index clearer. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-41-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/openat-syscall-all-cpus.c | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 544db0839b3b..ca0a50e92839 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -22,7 +22,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - int err = -1, fd, cpu; + int err = -1, fd, idx, cpu; struct perf_cpu_map *cpus; struct evsel *evsel; unsigned int nr_openat_calls = 111, i; @@ -58,23 +58,23 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb goto out_evsel_delete; } - for (cpu = 0; cpu < cpus->nr; ++cpu) { - unsigned int ncalls = nr_openat_calls + cpu; + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + unsigned int ncalls = nr_openat_calls + idx; /* * XXX eventually lift this restriction in a way that * keeps perf building on older glibc installations * without CPU_ALLOC. 1024 cpus in 2010 still seems * a reasonable upper limit tho :-) */ - if (cpus->map[cpu] >= CPU_SETSIZE) { - pr_debug("Ignoring CPU %d\n", cpus->map[cpu]); + if (cpu >= CPU_SETSIZE) { + pr_debug("Ignoring CPU %d\n", cpu); continue; } - CPU_SET(cpus->map[cpu], &cpu_set); + CPU_SET(cpu, &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[cpu], + cpu, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close_fd; } @@ -82,29 +82,29 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb fd = openat(0, "/etc/passwd", O_RDONLY); close(fd); } - CPU_CLR(cpus->map[cpu], &cpu_set); + CPU_CLR(cpu, &cpu_set); } evsel->core.cpus = perf_cpu_map__get(cpus); err = 0; - for (cpu = 0; cpu < cpus->nr; ++cpu) { + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { unsigned int expected; - if (cpus->map[cpu] >= CPU_SETSIZE) + if (cpu >= CPU_SETSIZE) continue; - if (evsel__read_on_cpu(evsel, cpu, 0) < 0) { + if (evsel__read_on_cpu(evsel, idx, 0) < 0) { pr_debug("evsel__read_on_cpu\n"); err = -1; break; } - expected = nr_openat_calls + cpu; - if (perf_counts(evsel->counts, cpu, 0)->val != expected) { + expected = nr_openat_calls + idx; + if (perf_counts(evsel->counts, idx, 0)->val != expected) { pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); + expected, cpu, perf_counts(evsel->counts, idx, 0)->val); err = -1; } } From 379c224bef724cf52bd3d2364d29fc63f3e743d3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:44 -0800 Subject: [PATCH 373/493] perf stat: Correct check_per_pkg() cpu Code was incorrectly using the cpu map index as the CPU. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-42-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index f7f9757eba23..86ab427e87fc 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -292,11 +292,12 @@ static bool pkg_id_equal(const void *__key1, const void *__key2, return *key1 == *key2; } -static int check_per_pkg(struct evsel *counter, - struct perf_counts_values *vals, int cpu, bool *skip) +static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, + int cpu_map_idx, bool *skip) { struct hashmap *mask = counter->per_pkg_mask; struct perf_cpu_map *cpus = evsel__cpus(counter); + int cpu = perf_cpu_map__cpu(cpus, cpu_map_idx); int s, d, ret = 0; uint64_t *key; From 5b1af93dbc7e64ab1b872129cfe1f2318cc29c67 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:45 -0800 Subject: [PATCH 374/493] perf stat: Swap variable name cpu to index The use of CPU is error prone, switch to cpu_map_idx. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-43-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 308 +++++++++++++++++----------------- tools/perf/util/stat.c | 16 +- tools/perf/util/stat.h | 4 +- 3 files changed, 164 insertions(+), 164 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 5c7308efa768..10af7804e482 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -32,7 +32,7 @@ struct saved_value { struct evsel *evsel; enum stat_type type; int ctx; - int cpu; + int cpu_map_idx; struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; @@ -47,8 +47,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) rb_node); const struct saved_value *b = entry; - if (a->cpu != b->cpu) - return a->cpu - b->cpu; + if (a->cpu_map_idx != b->cpu_map_idx) + return a->cpu_map_idx - b->cpu_map_idx; /* * Previously the rbtree was used to link generic metrics. @@ -105,7 +105,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, } static struct saved_value *saved_value_lookup(struct evsel *evsel, - int cpu, + int cpu_map_idx, bool create, enum stat_type type, int ctx, @@ -115,7 +115,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { - .cpu = cpu, + .cpu_map_idx = cpu_map_idx, .evsel = evsel, .type = type, .ctx = ctx, @@ -213,10 +213,10 @@ struct runtime_stat_data { static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int cpu, u64 count, + int cpu_map_idx, u64 count, struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, + struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type, rsd->ctx, st, rsd->cgrp); if (v) @@ -229,7 +229,7 @@ static void update_runtime_stat(struct runtime_stat *st, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu, struct runtime_stat *st) + int cpu_map_idx, struct runtime_stat *st) { u64 count_ns = count; struct saved_value *v; @@ -241,88 +241,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); + update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); + update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); + update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); + update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS)) update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT)) update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); + update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); + update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); + update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); + update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, + v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu, true, STAT_NONE, 0, st, rsd.cgrp); + cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -464,12 +464,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int cpu, + enum stat_type type, int cpu_map_idx, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -477,12 +477,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int cpu, + enum stat_type type, int cpu_map_idx, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -490,7 +490,7 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -498,7 +498,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -513,7 +513,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -521,7 +521,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -532,7 +532,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -540,7 +540,7 @@ static void print_branch_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -551,7 +551,7 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -559,7 +559,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -570,7 +570,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -578,7 +578,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -588,7 +588,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -596,7 +596,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -606,7 +606,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -614,7 +614,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -624,7 +624,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -632,7 +632,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -690,61 +690,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int cpu, struct runtime_stat *st, +static double td_total_slots(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd); } -static double td_bad_spec(int cpu, struct runtime_stat *st, +static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd); - total_slots = td_total_slots(cpu, st, rsd); + total_slots = td_total_slots(cpu_map_idx, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int cpu, struct runtime_stat *st, +static double td_retiring(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(cpu, st, rsd); + double total_slots = td_total_slots(cpu_map_idx, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu, rsd); + cpu_map_idx, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int cpu, struct runtime_stat *st, +static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(cpu, st, rsd); + double total_slots = td_total_slots(cpu_map_idx, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu, rsd); + cpu_map_idx, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int cpu, struct runtime_stat *st, +static double td_be_bound(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(cpu, st, rsd) + - td_bad_spec(cpu, st, rsd) + - td_retiring(cpu, st, rsd)); + double sum = (td_fe_bound(cpu_map_idx, st, rsd) + + td_bad_spec(cpu_map_idx, st, rsd) + + td_retiring(cpu_map_idx, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -755,15 +755,15 @@ static double td_be_bound(int cpu, struct runtime_stat *st, * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int cpu, enum stat_type type, +static double td_metric_ratio(int cpu_map_idx, enum stat_type type, struct runtime_stat *stat, struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); - double d = runtime_stat_avg(stat, type, cpu, rsd); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd); + double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd); if (sum) return d / sum; @@ -775,23 +775,23 @@ static double td_metric_ratio(int cpu, enum stat_type type, * We allow two missing. */ -static bool full_td(int cpu, struct runtime_stat *stat, +static bool full_td(int cpu_map_idx, struct runtime_stat *stat, struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, int cpu, +static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -799,9 +799,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu, double smi_num, aperf, cycles, cost = 0.0; const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); - aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); - cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -818,7 +818,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu, static int prepare_metric(struct evsel **metric_events, struct metric_ref *metric_refs, struct expr_parse_ctx *pctx, - int cpu, + int cpu_map_idx, struct runtime_stat *st) { double scale; @@ -836,7 +836,7 @@ static int prepare_metric(struct evsel **metric_events, scale = 1e-9; source_count = 1; } else { - v = saved_value_lookup(metric_events[i], cpu, false, + v = saved_value_lookup(metric_events[i], cpu_map_idx, false, STAT_NONE, 0, st, metric_events[i]->cgrp); if (!v) @@ -874,7 +874,7 @@ static void generic_metric(struct perf_stat_config *config, const char *metric_name, const char *metric_unit, int runtime, - int cpu, + int cpu_map_idx, struct perf_stat_output_ctx *out, struct runtime_stat *st) { @@ -889,7 +889,7 @@ static void generic_metric(struct perf_stat_config *config, return; pctx->runtime = runtime; - i = prepare_metric(metric_events, metric_refs, pctx, cpu, st); + i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st); if (i < 0) { expr__ctx_free(pctx); return; @@ -934,7 +934,7 @@ static void generic_metric(struct perf_stat_config *config, expr__ctx_free(pctx); } -double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) +double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st) { struct expr_parse_ctx *pctx; double ratio = 0.0; @@ -943,7 +943,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_sta if (!pctx) return NAN; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0) + if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -956,7 +956,7 @@ out: void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int cpu, + double avg, int cpu_map_idx, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st) @@ -975,7 +975,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (config->iostat_run) { iostat_print_metric(config, evsel, out); } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); if (total) { ratio = avg / total; @@ -985,11 +985,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - cpu, &rsd)); + cpu_map_idx, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -999,8 +999,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) - print_branch_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0) + print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -1009,8 +1009,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) - print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0) + print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -1019,8 +1019,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) - print_l1_icache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0) + print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -1029,8 +1029,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) - print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0) + print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1039,8 +1039,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) - print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0) + print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1049,27 +1049,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) - print_ll_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0) + print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); + print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); + print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); if (total) { ratio = avg / total; @@ -1078,7 +1078,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1088,8 +1088,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (total2 < avg) total2 = avg; @@ -1099,19 +1099,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (avg) ratio = total / avg; @@ -1124,28 +1124,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(cpu, st, &rsd); + double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(cpu, st, &rsd); + double retiring = td_retiring(cpu_map_idx, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(cpu, st, &rsd); + double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(cpu, st, &rsd); + double be_bound = td_be_bound(cpu_map_idx, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1158,14 +1158,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(cpu, st, &rsd) > 0) + if (td_total_slots(cpu_map_idx, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(cpu, st, &rsd)) { - double retiring = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double retiring = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_RETIRING, st, &rsd); if (retiring > 0.7) @@ -1173,8 +1173,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(cpu, st, &rsd)) { - double fe_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FE_BOUND, st, &rsd); if (fe_bound > 0.2) @@ -1182,8 +1182,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(cpu, st, &rsd)) { - double be_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double be_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BE_BOUND, st, &rsd); if (be_bound > 0.2) @@ -1191,8 +1191,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(cpu, st, &rsd)) { - double bad_spec = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BAD_SPEC, st, &rsd); if (bad_spec > 0.1) @@ -1200,11 +1200,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double retiring = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double retiring = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_RETIRING, st, &rsd); - double heavy_ops = td_metric_ratio(cpu, + double heavy_ops = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_HEAVY_OPS, st, &rsd); double light_ops = retiring - heavy_ops; @@ -1220,11 +1220,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "light operations", light_ops * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double bad_spec = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double bad_spec = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BAD_SPEC, st, &rsd); - double br_mis = td_metric_ratio(cpu, + double br_mis = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BR_MISPREDICT, st, &rsd); double m_clears = bad_spec - br_mis; @@ -1240,11 +1240,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "machine clears", m_clears * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double fe_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double fe_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FE_BOUND, st, &rsd); - double fetch_lat = td_metric_ratio(cpu, + double fetch_lat = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FETCH_LAT, st, &rsd); double fetch_bw = fe_bound - fetch_lat; @@ -1260,11 +1260,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth", fetch_bw * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double be_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double be_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BE_BOUND, st, &rsd); - double mem_bound = td_metric_ratio(cpu, + double mem_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_MEM_BOUND, st, &rsd); double core_bound = be_bound - mem_bound; @@ -1281,12 +1281,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, core_bound * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, - evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { + evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) { char unit = ' '; char unit_buf[10] = "/sec"; - total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); if (total) ratio = convert_unit_double(1000000000.0 * avg / total, &unit); @@ -1294,7 +1294,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu, out, st, &rsd); + print_smi_cost(config, cpu_map_idx, out, st, &rsd); } else { num = 0; } @@ -1307,7 +1307,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, mexp->metric_refs, evsel->name, mexp->metric_name, - mexp->metric_unit, mexp->runtime, cpu, out, st); + mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 86ab427e87fc..7dbd7c4f3c33 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -356,14 +356,14 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, static int process_counter_values(struct perf_stat_config *config, struct evsel *evsel, - int cpu, int thread, + int cpu_map_idx, int thread, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; static struct perf_counts_values zero; bool skip = false; - if (check_per_pkg(evsel, count, cpu, &skip)) { + if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) { pr_err("failed to read per-pkg counter\n"); return -1; } @@ -379,11 +379,11 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_NODE: case AGGR_NONE: if (!evsel->snapshot) - evsel__compute_deltas(evsel, cpu, thread, count); + evsel__compute_deltas(evsel, cpu_map_idx, thread, count); perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, - cpu, &rt_stat); + cpu_map_idx, &rt_stat); } if (config->aggr_mode == AGGR_THREAD) { @@ -412,15 +412,15 @@ static int process_counter_maps(struct perf_stat_config *config, { int nthreads = perf_thread_map__nr(counter->core.threads); int ncpus = evsel__nr_cpus(counter); - int cpu, thread; + int idx, thread; if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - if (process_counter_values(config, counter, cpu, thread, - perf_counts(counter->counts, cpu, thread))) + for (idx = 0; idx < ncpus; idx++) { + if (process_counter_values(config, counter, idx, thread, + perf_counts(counter->counts, idx, thread))) return -1; } } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 5e25d53e891b..691c12fd8976 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -208,7 +208,7 @@ void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu, struct runtime_stat *st); + int cpu_map_idx, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; @@ -253,5 +253,5 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf struct target *_target, struct timespec *ts, int argc, const char **argv); struct metric_expr; -double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st); +double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st); #endif From 91802e73f77146d69afa0be7eafc983ec84b2bb0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:46 -0800 Subject: [PATCH 375/493] libperf: Sync evsel documentation cpu was renamed cpu_map_idx, for clarity. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-44-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/libperf.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index faef9ba3a540..32c5051c24eb 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -136,16 +136,16 @@ SYNOPSIS int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel); - void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); + void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); int perf_evsel__mmap(struct perf_evsel *evsel, int pages); void perf_evsel__munmap(struct perf_evsel *evsel); - void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); - int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); + int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); int perf_evsel__enable(struct perf_evsel *evsel); - int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); int perf_evsel__disable(struct perf_evsel *evsel); - int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); From 7263f3498ba8b6e65c1d810ccafec64cd61a6dc1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:47 -0800 Subject: [PATCH 376/493] perf bpf: Rename 'cpu' to 'cpu_map_idx' Synchronize the caller in evsel with the called function. Shorten 3 lines of code in bperf_read by using perf_cpu_map__for_each_cpu(). This code is frequently using variables named cpu as cpu map indices, which doesn't matter as all CPUs are in the CPU map. It is strange in some cases the cpumap is used at all. Committer notes: Found when building with BUILD_BPF_SKEL=1: Remove unused 'num_cpu' variable in bperf__read(). Make 'j' an 'int' as it is used in perf_cpu_map__for_each_cpu() to compare against an 'int' Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-45-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter.c | 19 +++++++++---------- tools/perf/util/bpf_counter.h | 4 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index c17d4a43ce06..80d1a3a31052 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -265,7 +265,7 @@ static int bpf_program_profiler__read(struct evsel *evsel) return 0; } -static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, +static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { struct bpf_prog_profiler_bpf *skel; @@ -277,7 +277,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, assert(skel != NULL); ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu, &fd, BPF_ANY); + &cpu_map_idx, &fd, BPF_ANY); if (ret) return ret; } @@ -566,12 +566,12 @@ out: return err; } -static int bperf__install_pe(struct evsel *evsel, int cpu, int fd) +static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { struct bperf_leader_bpf *skel = evsel->leader_skel; return bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu, &fd, BPF_ANY); + &cpu_map_idx, &fd, BPF_ANY); } /* @@ -608,7 +608,8 @@ static int bperf__read(struct evsel *evsel) __u32 num_cpu_bpf = cpu__max_cpu(); struct bpf_perf_event_value values[num_cpu_bpf]; int reading_map_fd, err = 0; - __u32 i, j, num_cpu; + __u32 i; + int j; bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); @@ -623,9 +624,7 @@ static int bperf__read(struct evsel *evsel) case BPERF_FILTER_GLOBAL: assert(i == 0); - num_cpu = all_cpu_map->nr; - for (j = 0; j < num_cpu; j++) { - cpu = all_cpu_map->map[j]; + perf_cpu_map__for_each_cpu(cpu, j, all_cpu_map) { perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; @@ -757,11 +756,11 @@ static inline bool bpf_counter_skip(struct evsel *evsel) evsel->follower_skel == NULL; } -int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) +int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { if (bpf_counter_skip(evsel)) return 0; - return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); + return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd); } int bpf_counter__load(struct evsel *evsel, struct target *target) diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 65ebaa6694fb..4dbf26408b69 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -16,7 +16,7 @@ typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, struct target *target); typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, - int cpu, + int cpu_map_idx, int fd); struct bpf_counter_ops { @@ -40,7 +40,7 @@ int bpf_counter__enable(struct evsel *evsel); int bpf_counter__disable(struct evsel *evsel); int bpf_counter__read(struct evsel *evsel); void bpf_counter__destroy(struct evsel *evsel); -int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); +int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd); #else /* HAVE_BPF_SKEL */ From 84d2f4f0375d4857f9f9e57a9ad75cbf0f34e108 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:48 -0800 Subject: [PATCH 377/493] perf c2c: Use more intention revealing iterator Use perf_cpu_map__for_each_cpu() in setup_nodes. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-46-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b5c67ef73862..ad1fbeafc93d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2015,7 +2015,7 @@ static int setup_nodes(struct perf_session *session) { struct numa_node *n; unsigned long **nodes; - int node, cpu; + int node, cpu, idx; int *cpu2node; if (c2c.node_info > 2) @@ -2057,13 +2057,13 @@ static int setup_nodes(struct perf_session *session) if (perf_cpu_map__empty(map)) continue; - for (cpu = 0; cpu < map->nr; cpu++) { - set_bit(map->map[cpu], set); + perf_cpu_map__for_each_cpu(cpu, idx, map) { + set_bit(cpu, set); - if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug")) + if (WARN_ONCE(cpu2node[cpu] != -1, "node/cpu topology bug")) return -EINVAL; - cpu2node[map->map[cpu]] = node; + cpu2node[cpu] = node; } } From b57af1b4017abff969425dffd6f59ddfdedce8cb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:49 -0800 Subject: [PATCH 378/493] perf script: Fix flipped index and cpu perf_counts are accessed by the densely packed index. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-47-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f40319144856..bb43529618b3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2131,7 +2131,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp) perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { struct perf_counts_values *counts; - counts = perf_counts(counter->counts, cpu, thread); + counts = perf_counts(counter->counts, idx, thread); printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n", cpu, From ce37ab3eb2490aba60ab1a622a4c6c6ee9a7cc66 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:50 -0800 Subject: [PATCH 379/493] perf stat: Correct first_shadow_cpu to return index perf_stat__update_shadow_stats() and perf_stat__print_shadow_stats() use a cpu map index rather than a CPU, but first_shadow_cpu is returning the wrong value for this. Change first_shadow_cpu to first_shadow_cpu_map_idx to make things agree. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-48-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 0f192360b6c6..ba95379efcfb 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -327,24 +327,23 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_cpu(struct perf_stat_config *config, - struct evsel *evsel, const struct aggr_cpu_id *id) +static int first_shadow_cpu_map_idx(struct perf_stat_config *config, + struct evsel *evsel, const struct aggr_cpu_id *id) { - struct perf_cpu_map *cpus; + struct perf_cpu_map *cpus = evsel__cpus(evsel); int cpu, idx; if (config->aggr_mode == AGGR_NONE) - return id->cpu; + return perf_cpu_map__idx(cpus, id->cpu); if (!config->aggr_get_id) return 0; - cpus = evsel__cpus(evsel); perf_cpu_map__for_each_cpu(cpu, idx, cpus) { struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); if (aggr_cpu_id__equal(&cpu_id, id)) - return cpu; + return idx; } return 0; } @@ -503,7 +502,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_cpu(config, counter, &id), + first_shadow_cpu_map_idx(config, counter, &id), &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only) { print_noise(config, counter, noise); @@ -532,7 +531,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, val += perf_counts(counter->counts, idx, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(config, counter, &id), + first_shadow_cpu_map_idx(config, counter, &id), &rt_stat); } } From 6d18804b963b78dcd53851f11e9080408b3d85c2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 Jan 2022 22:13:51 -0800 Subject: [PATCH 380/493] perf cpumap: Give CPUs their own type A common problem is confusing CPU map indices with the CPU, by wrapping the CPU with a struct then this is avoided. This approach is similar to atomic_t. Committer notes: To make it build with BUILD_BPF_SKEL=1 these files needed the conversions to 'struct perf_cpu' usage: tools/perf/util/bpf_counter.c tools/perf/util/bpf_counter_cgroup.c tools/perf/util/bpf_ftrace.c Also perf_env__get_cpu() was removed back in "perf cpumap: Switch cpu_map__build_map to cpu function". Additionally these needed to be fixed for the ARM builds to complete: tools/perf/arch/arm/util/cs-etm.c tools/perf/arch/arm64/util/pmu.c Suggested-by: John Garry Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Vineet Singh Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-49-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/cpumap.c | 103 ++++++++++-------- tools/lib/perf/evlist.c | 4 +- tools/lib/perf/evsel.c | 9 +- tools/lib/perf/include/internal/cpumap.h | 9 +- tools/lib/perf/include/internal/evlist.h | 3 +- tools/lib/perf/include/internal/evsel.h | 4 +- tools/lib/perf/include/internal/mmap.h | 5 +- tools/lib/perf/include/perf/cpumap.h | 9 +- tools/lib/perf/mmap.c | 2 +- tools/perf/arch/arm/util/cs-etm.c | 35 ++++-- tools/perf/arch/arm64/util/pmu.c | 2 +- tools/perf/bench/epoll-ctl.c | 2 +- tools/perf/bench/epoll-wait.c | 2 +- tools/perf/bench/futex-hash.c | 2 +- tools/perf/bench/futex-lock-pi.c | 2 +- tools/perf/bench/futex-requeue.c | 2 +- tools/perf/bench/futex-wake-parallel.c | 2 +- tools/perf/bench/futex-wake.c | 2 +- tools/perf/builtin-c2c.c | 13 ++- tools/perf/builtin-ftrace.c | 2 +- tools/perf/builtin-kmem.c | 2 +- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-sched.c | 65 ++++++----- tools/perf/builtin-script.c | 5 +- tools/perf/builtin-stat.c | 67 ++++++------ tools/perf/tests/attr.c | 6 +- tools/perf/tests/bitmap.c | 2 +- tools/perf/tests/cpumap.c | 6 +- tools/perf/tests/event_update.c | 6 +- tools/perf/tests/mem2node.c | 2 +- tools/perf/tests/mmap-basic.c | 4 +- tools/perf/tests/openat-syscall-all-cpus.c | 17 +-- tools/perf/tests/stat.c | 3 +- tools/perf/tests/topology.c | 30 ++--- tools/perf/util/affinity.c | 2 +- tools/perf/util/auxtrace.c | 12 +- tools/perf/util/auxtrace.h | 5 +- tools/perf/util/bpf_counter.c | 12 +- tools/perf/util/bpf_counter_cgroup.c | 10 +- tools/perf/util/bpf_ftrace.c | 4 +- tools/perf/util/cpumap.c | 91 ++++++++-------- tools/perf/util/cpumap.h | 26 ++--- tools/perf/util/cputopo.c | 6 +- tools/perf/util/env.c | 29 ++--- tools/perf/util/env.h | 3 +- tools/perf/util/evlist.c | 8 +- tools/perf/util/evlist.h | 2 +- tools/perf/util/evsel.c | 6 +- tools/perf/util/expr.c | 2 +- tools/perf/util/header.c | 6 +- tools/perf/util/mmap.c | 19 ++-- tools/perf/util/mmap.h | 3 +- tools/perf/util/perf_api_probe.c | 15 ++- tools/perf/util/python.c | 4 +- tools/perf/util/record.c | 11 +- .../scripting-engines/trace-event-python.c | 6 +- tools/perf/util/session.c | 10 +- tools/perf/util/stat-display.c | 34 +++--- tools/perf/util/stat.c | 2 +- tools/perf/util/stat.h | 2 +- tools/perf/util/svghelper.c | 6 +- tools/perf/util/synthetic-events.c | 12 +- tools/perf/util/synthetic-events.h | 3 +- tools/perf/util/util.h | 5 +- 64 files changed, 431 insertions(+), 356 deletions(-) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index eacea3ab965a..ee66760f1e63 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -10,15 +10,24 @@ #include #include -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) { - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); + struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); if (cpus != NULL) { - cpus->nr = 1; - cpus->map[0] = -1; + cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__dummy_new(void) +{ + struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + + if (cpus) + cpus->map[0].cpu = -1; return cpus; } @@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void) if (nr_cpus < 0) return NULL; - cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int)); + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i] = i; - - cpus->nr = nr_cpus; - refcount_set(&cpus->refcnt, 1); + cpus->map[i].cpu = i; } return cpus; @@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void) return cpu_map__default_new(); } -static int cmp_int(const void *a, const void *b) + +static int cmp_cpu(const void *a, const void *b) { - return *(const int *)a - *(const int*)b; + const struct perf_cpu *cpu_a = a, *cpu_b = b; + + return cpu_a->cpu - cpu_b->cpu; } -static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) +static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) { - size_t payload_size = nr_cpus * sizeof(int); - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size); + size_t payload_size = nr_cpus * sizeof(struct perf_cpu); + struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus); int i, j; if (cpus != NULL) { memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(int), cmp_int); + qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i] != cpus->map[i - 1]) - cpus->map[j++] = cpus->map[i]; + if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) + cpus->map[j++].cpu = cpus->map[i].cpu; } cpus->nr = j; assert(j <= nr_cpus); - refcount_set(&cpus->refcnt, 1); } - return cpus; } @@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) { struct perf_cpu_map *cpus = NULL; int nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; + struct perf_cpu *tmp_cpus = NULL, *tmp; int max_entries = 0; int n, cpu, prev; char sep; @@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) if (new_max >= max_entries) { max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } while (++prev < cpu) - tmp_cpus[nr_cpus++] = prev; + tmp_cpus[nr_cpus++].cpu = prev; } if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++] = cpu; + tmp_cpus[nr_cpus++].cpu = cpu; if (n == 2 && sep == '-') prev = cpu; else @@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) unsigned long start_cpu, end_cpu = 0; char *p = NULL; int i, nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; + struct perf_cpu *tmp_cpus = NULL, *tmp; int max_entries = 0; if (!cpu_list) @@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i] == (int)start_cpu) + if (tmp_cpus[i].cpu == (int)start_cpu) goto invalid; if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto invalid; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++] = (int)start_cpu; + tmp_cpus[nr_cpus++].cpu = (int)start_cpu; } if (*p) ++p; @@ -250,12 +257,16 @@ out: return cpus; } -int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { + struct perf_cpu result = { + .cpu = -1 + }; + if (cpus && idx < cpus->nr) return cpus->map[idx]; - return -1; + return result; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) @@ -265,10 +276,10 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) bool perf_cpu_map__empty(const struct perf_cpu_map *map) { - return map ? map->map[0] == -1 : true; + return map ? map->map[0].cpu == -1 : true; } -int perf_cpu_map__idx(const struct perf_cpu_map *cpus, int cpu) +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { int low, high; @@ -278,13 +289,13 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, int cpu) low = 0; high = cpus->nr; while (low < high) { - int idx = (low + high) / 2, - cpu_at_idx = cpus->map[idx]; + int idx = (low + high) / 2; + struct perf_cpu cpu_at_idx = cpus->map[idx]; - if (cpu_at_idx == cpu) + if (cpu_at_idx.cpu == cpu.cpu) return idx; - if (cpu_at_idx > cpu) + if (cpu_at_idx.cpu > cpu.cpu) high = idx; else low = idx + 1; @@ -293,15 +304,19 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, int cpu) return -1; } -bool perf_cpu_map__has(const struct perf_cpu_map *cpus, int cpu) +bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { return perf_cpu_map__idx(cpus, cpu) != -1; } -int perf_cpu_map__max(struct perf_cpu_map *map) +struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) { + struct perf_cpu result = { + .cpu = -1 + }; + // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return map->nr > 0 ? map->map[map->nr - 1] : -1; + return map->nr > 0 ? map->map[map->nr - 1] : result; } /* @@ -315,7 +330,7 @@ int perf_cpu_map__max(struct perf_cpu_map *map) struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other) { - int *tmp_cpus; + struct perf_cpu *tmp_cpus; int tmp_len; int i, j, k; struct perf_cpu_map *merged; @@ -329,19 +344,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, if (!other) return orig; if (orig->nr == other->nr && - !memcmp(orig->map, other->map, orig->nr * sizeof(int))) + !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu))) return orig; tmp_len = orig->nr + other->nr; - tmp_cpus = malloc(tmp_len * sizeof(int)); + tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) return NULL; /* Standard merge algorithm from wikipedia */ i = j = k = 0; while (i < orig->nr && j < other->nr) { - if (orig->map[i] <= other->map[j]) { - if (orig->map[i] == other->map[j]) + if (orig->map[i].cpu <= other->map[j].cpu) { + if (orig->map[i].cpu == other->map[j].cpu) j++; tmp_cpus[k++] = orig->map[i++]; } else diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 245acbc53bd3..9a770bfdc804 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -407,7 +407,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int output, int cpu) + int output, struct perf_cpu cpu) { return perf_mmap__mmap(map, mp, output, cpu); } @@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { - int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); struct perf_evsel *evsel; int revent; diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index f1e1665ef4bd..7ea86a44eae5 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -78,10 +78,10 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre static int sys_perf_event_open(struct perf_event_attr *attr, - pid_t pid, int cpu, int group_fd, + pid_t pid, struct perf_cpu cpu, int group_fd, unsigned long flags) { - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags); } static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd) @@ -113,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, i int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - int cpu, idx, thread, err = 0; + struct perf_cpu cpu; + int idx, thread, err = 0; if (cpus == NULL) { static struct perf_cpu_map *empty_cpu_map; @@ -252,7 +253,7 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { int *fd = FD(evsel, idx, thread); struct perf_mmap *map; - int cpu = perf_cpu_map__cpu(evsel->cpus, idx); + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx); if (fd == NULL || *fd < 0) continue; diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 71a31ed738c9..581f9ffb4237 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -4,6 +4,11 @@ #include +/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ +struct perf_cpu { + int cpu; +}; + /** * A sized, reference counted, sorted array of integers representing CPU * numbers. This is commonly used to capture which CPUs a PMU is associated @@ -16,13 +21,13 @@ struct perf_cpu_map { /** Length of the map array. */ int nr; /** The CPU values. */ - int map[]; + struct perf_cpu map[]; }; #ifndef MAX_NR_CPUS #define MAX_NR_CPUS 2048 #endif -int perf_cpu_map__idx(const struct perf_cpu_map *cpus, int cpu); +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 6f74269a3ad4..4cefade540bd 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -4,6 +4,7 @@ #include #include +#include #include #define PERF_EVLIST__HLIST_BITS 8 @@ -36,7 +37,7 @@ typedef void typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int -(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu); struct perf_evlist_mmap_ops { perf_evlist_mmap__cb_idx_t idx; diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1f3eacbad2e8..cfc9ebd7968e 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -6,8 +6,8 @@ #include #include #include +#include -struct perf_cpu_map; struct perf_thread_map; struct xyarray; @@ -27,7 +27,7 @@ struct perf_sample_id { * queue number. */ int idx; - int cpu; + struct perf_cpu cpu; pid_t tid; /* Holds total ID period value for PERF_SAMPLE_READ processing. */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5e3422f40ed5..5a062af8e9d8 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -6,6 +6,7 @@ #include #include #include +#include /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) @@ -24,7 +25,7 @@ struct perf_mmap { void *base; int mask; int fd; - int cpu; + struct perf_cpu cpu; refcount_t refcnt; u64 prev; u64 start; @@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu); + int fd, struct perf_cpu cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 3f1c0afa3ccd..15b8faafd615 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,11 +3,10 @@ #define __LIBPERF_CPUMAP_H #include +#include #include #include -struct perf_cpu_map; - LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); @@ -16,11 +15,11 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); -LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); +LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index aaa457904008..f7ee07cb5818 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -32,7 +32,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu) + int fd, struct perf_cpu cpu) { map->prev = 0; map->mask = mp->mask; diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 129c0272d65b..2e8b2c4365a0 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -203,9 +203,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr, struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); /* Set option of each CPU we have */ - for (i = 0; i < cpu__max_cpu(); i++) { - if (!perf_cpu_map__has(event_cpus, i) || - !perf_cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(event_cpus, cpu) || + !perf_cpu_map__has(online_cpus, cpu)) continue; if (option & BIT(ETM_OPT_CTXTID)) { @@ -522,9 +524,11 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, /* cpu map is not empty, we have specific CPUs to work with */ if (!perf_cpu_map__empty(event_cpus)) { - for (i = 0; i < cpu__max_cpu(); i++) { - if (!perf_cpu_map__has(event_cpus, i) || - !perf_cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(event_cpus, cpu) || + !perf_cpu_map__has(online_cpus, cpu)) continue; if (cs_etm_is_ete(itr, i)) @@ -536,8 +540,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, } } else { /* get configuration for all CPUs in the system */ - for (i = 0; i < cpu__max_cpu(); i++) { - if (!perf_cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(online_cpus, cpu)) continue; if (cs_etm_is_ete(itr, i)) @@ -722,8 +728,10 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, } else { /* Make sure all specified CPUs are online */ for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) { - if (perf_cpu_map__has(event_cpus, i) && - !perf_cpu_map__has(online_cpus, i)) + struct perf_cpu cpu = { .cpu = i, }; + + if (perf_cpu_map__has(event_cpus, cpu) && + !perf_cpu_map__has(online_cpus, cpu)) return -EINVAL; } @@ -743,9 +751,12 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, offset = CS_ETM_SNAPSHOT + 1; - for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++) - if (perf_cpu_map__has(cpu_map, i)) + for (i = 0; i < cpu__max_cpu().cpu && offset < priv_size; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (perf_cpu_map__has(cpu_map, cpu)) cs_etm_get_metadata(i, &offset, itr, info); + } perf_cpu_map__put(online_cpus); diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index d3a18f9c85f6..79124bba713e 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -15,7 +15,7 @@ const struct pmu_events_map *pmu_events_map__find(void) * The cpumap should cover all CPUs. Otherwise, some CPUs may * not support some events or have different event IDs. */ - if (pmu->cpus->nr != cpu__max_cpu()) + if (pmu->cpus->nr != cpu__max_cpu().cpu) return NULL; return perf_pmu__find_map(pmu); diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index ddaca75c3bc0..1a17ec83d3c4 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -253,7 +253,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 79d13dbc0a47..0d1dd8879197 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -342,7 +342,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index fcdea3e44937..9627b6ab8670 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -177,7 +177,7 @@ int bench_futex_hash(int argc, const char **argv) goto errmem; CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 137890f78e17..a512a320df74 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -136,7 +136,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, worker[i].futex = &global_futex; CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index f7a5ffebb940..aca47ce8b1e7 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -131,7 +131,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 0983f40b4b40..888ee6037945 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -152,7 +152,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 2226a475e782..aa82db51c0ab 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -105,7 +105,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index ad1fbeafc93d..77dd4afacca4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2015,7 +2015,8 @@ static int setup_nodes(struct perf_session *session) { struct numa_node *n; unsigned long **nodes; - int node, cpu, idx; + int node, idx; + struct perf_cpu cpu; int *cpu2node; if (c2c.node_info > 2) @@ -2038,8 +2039,8 @@ static int setup_nodes(struct perf_session *session) if (!cpu2node) return -ENOMEM; - for (cpu = 0; cpu < c2c.cpus_cnt; cpu++) - cpu2node[cpu] = -1; + for (idx = 0; idx < c2c.cpus_cnt; idx++) + cpu2node[idx] = -1; c2c.cpu2node = cpu2node; @@ -2058,12 +2059,12 @@ static int setup_nodes(struct perf_session *session) continue; perf_cpu_map__for_each_cpu(cpu, idx, map) { - set_bit(cpu, set); + set_bit(cpu.cpu, set); - if (WARN_ONCE(cpu2node[cpu] != -1, "node/cpu topology bug")) + if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug")) return -EINVAL; - cpu2node[cpu] = node; + cpu2node[cpu.cpu] = node; } } diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index f16c39a37a52..71452599f87d 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -281,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap) int ret; int last_cpu; - last_cpu = perf_cpu_map__cpu(cpumap, cpumap->nr - 1); + last_cpu = perf_cpu_map__cpu(cpumap, cpumap->nr - 1).cpu; mask_size = last_cpu / 4 + 2; /* one more byte for EOS */ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index da03a341c63c..99d7ff9a8eff 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -192,7 +192,7 @@ static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_samp int ret = evsel__process_alloc_event(evsel, sample); if (!ret) { - int node1 = cpu__get_node(sample->cpu), + int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), node2 = evsel__intval(evsel, sample, "node"); if (node1 != node2) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6ac2160913ea..0a63295d30f0 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2796,7 +2796,7 @@ int cmd_record(int argc, const char **argv) symbol__init(NULL); if (rec->opts.affinity != PERF_AFFINITY_SYS) { - rec->affinity_mask.nbits = cpu__max_cpu(); + rec->affinity_mask.nbits = cpu__max_cpu().cpu; rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits); if (!rec->affinity_mask.bits) { pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 9da1da4749c9..72d446de9c60 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -167,7 +167,7 @@ struct trace_sched_handler { struct perf_sched_map { DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); - int *comp_cpus; + struct perf_cpu *comp_cpus; bool comp; struct perf_thread_map *color_pids; const char *color_pids_str; @@ -191,7 +191,7 @@ struct perf_sched { * Track the current task - that way we can know whether there's any * weird events, such as a task being switched away that is not current. */ - int max_cpu; + struct perf_cpu max_cpu; u32 curr_pid[MAX_CPUS]; struct thread *curr_thread[MAX_CPUS]; char next_shortname1; @@ -1535,28 +1535,31 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, int new_shortname; u64 timestamp0, timestamp = sample->time; s64 delta; - int i, this_cpu = sample->cpu; + int i; + struct perf_cpu this_cpu = { + .cpu = sample->cpu, + }; int cpus_nr; bool new_cpu = false; const char *color = PERF_COLOR_NORMAL; char stimestamp[32]; - BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); + BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); - if (this_cpu > sched->max_cpu) + if (this_cpu.cpu > sched->max_cpu.cpu) sched->max_cpu = this_cpu; if (sched->map.comp) { cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); - if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { + if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { sched->map.comp_cpus[cpus_nr++] = this_cpu; new_cpu = true; } } else - cpus_nr = sched->max_cpu; + cpus_nr = sched->max_cpu.cpu; - timestamp0 = sched->cpu_last_switched[this_cpu]; - sched->cpu_last_switched[this_cpu] = timestamp; + timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; + sched->cpu_last_switched[this_cpu.cpu] = timestamp; if (timestamp0) delta = timestamp - timestamp0; else @@ -1577,7 +1580,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, return -1; } - sched->curr_thread[this_cpu] = thread__get(sched_in); + sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); printf(" "); @@ -1608,8 +1611,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, } for (i = 0; i < cpus_nr; i++) { - int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; - struct thread *curr_thread = sched->curr_thread[cpu]; + struct perf_cpu cpu = { + .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, + }; + struct thread *curr_thread = sched->curr_thread[cpu.cpu]; struct thread_runtime *curr_tr; const char *pid_color = color; const char *cpu_color = color; @@ -1623,13 +1628,13 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) cpu_color = COLOR_CPUS; - if (cpu != this_cpu) + if (cpu.cpu != this_cpu.cpu) color_fprintf(stdout, color, " "); else color_fprintf(stdout, cpu_color, "*"); - if (sched->curr_thread[cpu]) { - curr_tr = thread__get_runtime(sched->curr_thread[cpu]); + if (sched->curr_thread[cpu.cpu]) { + curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); if (curr_tr == NULL) { thread__put(sched_in); return -1; @@ -1929,7 +1934,7 @@ static char *timehist_get_commstr(struct thread *thread) static void timehist_header(struct perf_sched *sched) { - u32 ncpus = sched->max_cpu + 1; + u32 ncpus = sched->max_cpu.cpu + 1; u32 i, j; printf("%15s %6s ", "time", "cpu"); @@ -2008,7 +2013,7 @@ static void timehist_print_sample(struct perf_sched *sched, struct thread_runtime *tr = thread__priv(thread); const char *next_comm = evsel__strval(evsel, sample, "next_comm"); const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); - u32 max_cpus = sched->max_cpu + 1; + u32 max_cpus = sched->max_cpu.cpu + 1; char tstr[64]; char nstr[30]; u64 wait_time; @@ -2389,7 +2394,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); printf("%15s [%04d] ", tstr, sample->cpu); if (sched->show_cpu_visual) - printf(" %*s ", sched->max_cpu + 1, ""); + printf(" %*s ", sched->max_cpu.cpu + 1, ""); printf(" %-*s ", comm_width, timehist_get_commstr(thread)); @@ -2449,13 +2454,13 @@ static void timehist_print_migration_event(struct perf_sched *sched, { struct thread *thread; char tstr[64]; - u32 max_cpus = sched->max_cpu + 1; + u32 max_cpus; u32 ocpu, dcpu; if (sched->summary_only) return; - max_cpus = sched->max_cpu + 1; + max_cpus = sched->max_cpu.cpu + 1; ocpu = evsel__intval(evsel, sample, "orig_cpu"); dcpu = evsel__intval(evsel, sample, "dest_cpu"); @@ -2918,7 +2923,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" Total scheduling time (msec): "); print_sched_time(hist_time, 2); - printf(" (x %d)\n", sched->max_cpu); + printf(" (x %d)\n", sched->max_cpu.cpu); } typedef int (*sched_handler)(struct perf_tool *tool, @@ -2935,9 +2940,11 @@ static int perf_timehist__process_sample(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); int err = 0; - int this_cpu = sample->cpu; + struct perf_cpu this_cpu = { + .cpu = sample->cpu, + }; - if (this_cpu > sched->max_cpu) + if (this_cpu.cpu > sched->max_cpu.cpu) sched->max_cpu = this_cpu; if (evsel->handler != NULL) { @@ -3054,10 +3061,10 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; /* pre-allocate struct for per-CPU idle stats */ - sched->max_cpu = session->header.env.nr_cpus_online; - if (sched->max_cpu == 0) - sched->max_cpu = 4; - if (init_idle_threads(sched->max_cpu)) + sched->max_cpu.cpu = session->header.env.nr_cpus_online; + if (sched->max_cpu.cpu == 0) + sched->max_cpu.cpu = 4; + if (init_idle_threads(sched->max_cpu.cpu)) goto out; /* summary_only implies summary option, but don't overwrite summary if set */ @@ -3209,10 +3216,10 @@ static int setup_map_cpus(struct perf_sched *sched) { struct perf_cpu_map *map; - sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); + sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); if (sched->map.comp) { - sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); + sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); if (!sched->map.comp_cpus) return -1; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb43529618b3..ecd4f99a6c14 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2115,7 +2115,8 @@ static struct scripting_ops *scripting_ops; static void __process_stat(struct evsel *counter, u64 tstamp) { int nthreads = perf_thread_map__nr(counter->core.threads); - int idx, cpu, thread; + int idx, thread; + struct perf_cpu cpu; static int header_printed; if (counter->core.system_wide) @@ -2134,7 +2135,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp) counts = perf_counts(counter->counts, idx, thread); printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n", - cpu, + cpu.cpu, perf_thread_map__pid(counter->core.threads, thread), counts->val, counts->ena, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index dfb8f7847e6c..973ade18b72a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -234,7 +234,7 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b) return false; for (int i = 0; i < a->core.cpus->nr; i++) { - if (a->core.cpus->map[i] != b->core.cpus->map[i]) + if (a->core.cpus->map[i].cpu != b->core.cpus->map[i].cpu) return false; } @@ -331,7 +331,7 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t struct perf_counts_values *count) { struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread); - int cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx); + struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx); return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, process_synthesized_event, NULL); @@ -396,7 +396,8 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ fprintf(stat_config.output, "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", evsel__name(counter), - perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx), + perf_cpu_map__cpu(evsel__cpus(counter), + cpu_map_idx).cpu, count->val, count->ena, count->run); } } @@ -1328,61 +1329,61 @@ static const char *const aggr_mode__string[] = { }; static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, - int cpu) + struct perf_cpu cpu) { return aggr_cpu_id__socket(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, - int cpu) + struct perf_cpu cpu) { return aggr_cpu_id__die(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, - int cpu) + struct perf_cpu cpu) { return aggr_cpu_id__core(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, - int cpu) + struct perf_cpu cpu) { return aggr_cpu_id__node(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, - aggr_get_id_t get_id, int cpu) + aggr_get_id_t get_id, struct perf_cpu cpu) { struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu])) - config->cpus_aggr_map->map[cpu] = get_id(config, cpu); + if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) + config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); - id = config->cpus_aggr_map->map[cpu]; + id = config->cpus_aggr_map->map[cpu.cpu]; return id; } static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, - int cpu) + struct perf_cpu cpu) { return perf_stat__get_aggr(config, perf_stat__get_socket, cpu); } static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, - int cpu) + struct perf_cpu cpu) { return perf_stat__get_aggr(config, perf_stat__get_die, cpu); } static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, - int cpu) + struct perf_cpu cpu) { return perf_stat__get_aggr(config, perf_stat__get_core, cpu); } static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, - int cpu) + struct perf_cpu cpu) { return perf_stat__get_aggr(config, perf_stat__get_node, cpu); } @@ -1467,7 +1468,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - nr = perf_cpu_map__max(evsel_list->core.cpus); + nr = perf_cpu_map__max(evsel_list->core.cpus).cpu; stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } @@ -1495,55 +1496,55 @@ static void perf_stat__exit_aggr_mode(void) stat_config.cpus_aggr_map = NULL; } -static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(int cpu, void *data) +static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) - id.socket = env->cpu[cpu].socket_id; + if (cpu.cpu != -1) + id.socket = env->cpu[cpu.cpu].socket_id; return id; } -static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(int cpu, void *data) +static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) { + if (cpu.cpu != -1) { /* * die_id is relative to socket, so start * with the socket ID and then add die to * make a unique ID. */ - id.socket = env->cpu[cpu].socket_id; - id.die = env->cpu[cpu].die_id; + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; } return id; } -static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(int cpu, void *data) +static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) { + if (cpu.cpu != -1) { /* * core_id is relative to socket and die, * we need a global id. So we set * socket, die id and core id */ - id.socket = env->cpu[cpu].socket_id; - id.die = env->cpu[cpu].die_id; - id.core = env->cpu[cpu].core_id; + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; + id.core = env->cpu[cpu.cpu].core_id; } return id; } -static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(int cpu, void *data) +static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -1552,24 +1553,24 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(int cpu, void *data) } static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, - int cpu) + struct perf_cpu cpu) { return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, - int cpu) + struct perf_cpu cpu) { return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, - int cpu) + struct perf_cpu cpu) { return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, - int cpu) + struct perf_cpu cpu) { return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0f73e300f207..56fba08a3037 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -65,7 +65,7 @@ do { \ #define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) -static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { FILE *file; @@ -93,7 +93,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, /* syscall arguments */ __WRITE_ASS(fd, "d", fd); __WRITE_ASS(group_fd, "d", group_fd); - __WRITE_ASS(cpu, "d", cpu); + __WRITE_ASS(cpu, "d", cpu.cpu); __WRITE_ASS(pid, "d", pid); __WRITE_ASS(flags, "lu", flags); @@ -144,7 +144,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, return 0; } -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { int errno_saved = errno; diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 384856347236..0bf399c49849 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < map->nr; i++) - set_bit(map->map[i], bm); + set_bit(map->map[i].cpu, bm); } if (map) diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 89a155092f85..84e87e31f119 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -38,7 +38,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 20); for (i = 0; i < 20; i++) { - TEST_ASSERT_VAL("wrong cpu", map->map[i] == i); + TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i); } perf_cpu_map__put(map); @@ -67,8 +67,8 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, map = cpu_map__new_data(data); TEST_ASSERT_VAL("wrong nr", map->nr == 2); - TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); - TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); + TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1); + TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256); TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); perf_cpu_map__put(map); return 0; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index d01532d40acb..16b6d6f47f38 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -76,9 +76,9 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong id", ev->id == 123); TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS); TEST_ASSERT_VAL("wrong cpus", map->nr == 3); - TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1); - TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2); - TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3); + TEST_ASSERT_VAL("wrong cpus", map->map[0].cpu == 1); + TEST_ASSERT_VAL("wrong cpus", map->map[1].cpu == 2); + TEST_ASSERT_VAL("wrong cpus", map->map[2].cpu == 3); perf_cpu_map__put(map); return 0; } diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index b17b86391383..f4a4aba33f76 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -31,7 +31,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < map->nr; i++) { - set_bit(map->map[i], bm); + set_bit(map->map[i].cpu, bm); } } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 90b2feda31ac..0ad62914b4d7 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -59,11 +59,11 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest } CPU_ZERO(&cpu_set); - CPU_SET(cpus->map[0], &cpu_set); + CPU_SET(cpus->map[0].cpu, &cpu_set); sched_setaffinity(0, sizeof(cpu_set), &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf))); + cpus->map[0].cpu, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_free_cpus; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index ca0a50e92839..1ab362323d25 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -22,7 +22,8 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - int err = -1, fd, idx, cpu; + int err = -1, fd, idx; + struct perf_cpu cpu; struct perf_cpu_map *cpus; struct evsel *evsel; unsigned int nr_openat_calls = 111, i; @@ -66,15 +67,15 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb * without CPU_ALLOC. 1024 cpus in 2010 still seems * a reasonable upper limit tho :-) */ - if (cpu >= CPU_SETSIZE) { - pr_debug("Ignoring CPU %d\n", cpu); + if (cpu.cpu >= CPU_SETSIZE) { + pr_debug("Ignoring CPU %d\n", cpu.cpu); continue; } - CPU_SET(cpu, &cpu_set); + CPU_SET(cpu.cpu, &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpu, + cpu.cpu, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close_fd; } @@ -82,7 +83,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb fd = openat(0, "/etc/passwd", O_RDONLY); close(fd); } - CPU_CLR(cpu, &cpu_set); + CPU_CLR(cpu.cpu, &cpu_set); } evsel->core.cpus = perf_cpu_map__get(cpus); @@ -92,7 +93,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb perf_cpu_map__for_each_cpu(cpu, idx, cpus) { unsigned int expected; - if (cpu >= CPU_SETSIZE) + if (cpu.cpu >= CPU_SETSIZE) continue; if (evsel__read_on_cpu(evsel, idx, 0) < 0) { @@ -104,7 +105,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb expected = nr_openat_calls + idx; if (perf_counts(evsel->counts, idx, 0)->val != expected) { pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpu, perf_counts(evsel->counts, idx, 0)->val); + expected, cpu.cpu, perf_counts(evsel->counts, idx, 0)->val); err = -1; } } diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c index 2eb096b5e6da..500974040fe3 100644 --- a/tools/perf/tests/stat.c +++ b/tools/perf/tests/stat.c @@ -87,7 +87,8 @@ static int test__synthesize_stat(struct test_suite *test __maybe_unused, int sub count.run = 300; TEST_ASSERT_VAL("failed to synthesize stat_config", - !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL)); + !perf_event__synthesize_stat(NULL, (struct perf_cpu){.cpu = 1}, 2, 3, + &count, process_stat_event, NULL)); return 0; } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 33e4cb81265c..c4ef0c7002f1 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -112,7 +112,9 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); for (i = 0; i < session->header.env.nr_cpus_avail; i++) { - if (!perf_cpu_map__has(map, i)) + struct perf_cpu cpu = { .cpu = i }; + + if (!perf_cpu_map__has(map, cpu)) continue; pr_debug("CPU %d, core %d, socket %d\n", i, session->header.env.cpu[i].core_id, @@ -122,15 +124,15 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) // Test that CPU ID contains socket, die, core and CPU for (i = 0; i < map->nr; i++) { id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL); - TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", map->map[i] == id.cpu); + TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", map->map[i].cpu == id.cpu.cpu); TEST_ASSERT_VAL("Cpu map - Core ID doesn't match", - session->header.env.cpu[map->map[i]].core_id == id.core); + session->header.env.cpu[map->map[i].cpu].core_id == id.core); TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); TEST_ASSERT_VAL("Cpu map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == id.die); + session->header.env.cpu[map->map[i].cpu].die_id == id.die); TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1); } @@ -139,13 +141,13 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[map->map[i]].core_id == id.core); + session->header.env.cpu[map->map[i].cpu].core_id == id.core); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == id.die); + session->header.env.cpu[map->map[i].cpu].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1); } @@ -154,14 +156,14 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == id.die); + session->header.env.cpu[map->map[i].cpu].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); - TEST_ASSERT_VAL("Die map - CPU is set", id.cpu == -1); + TEST_ASSERT_VAL("Die map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1); } @@ -169,12 +171,12 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); - TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu == -1); + TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1); } @@ -186,7 +188,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); - TEST_ASSERT_VAL("Node map - CPU is set", id.cpu == -1); + TEST_ASSERT_VAL("Node map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1); } perf_session__delete(session); diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 7b12bd7a3080..f1e30d566db3 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -11,7 +11,7 @@ static int get_cpu_set_size(void) { - int sz = cpu__max_cpu() + 8 - 1; + int sz = cpu__max_cpu().cpu + 8 - 1; /* * sched_getaffinity doesn't like masks smaller than the kernel. * Hopefully that's big enough. diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c679394b898d..5632efc44738 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -123,7 +123,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, mm->prev = 0; mm->idx = mp->idx; mm->tid = mp->tid; - mm->cpu = mp->cpu; + mm->cpu = mp->cpu.cpu; if (!mp->len) { mm->base = NULL; @@ -180,7 +180,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, else mp->tid = -1; } else { - mp->cpu = -1; + mp->cpu.cpu = -1; mp->tid = perf_thread_map__pid(evlist->core.threads, idx); } } @@ -292,7 +292,7 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues, if (!queue->set) { queue->set = true; queue->tid = buffer->tid; - queue->cpu = buffer->cpu; + queue->cpu = buffer->cpu.cpu; } buffer->buffer_nr = queues->next_buffer_nr++; @@ -339,11 +339,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues, return 0; } -static bool filter_cpu(struct perf_session *session, int cpu) +static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu) { unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap; - return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap); + return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap); } static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, @@ -399,7 +399,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct auxtrace_buffer buffer = { .pid = -1, .tid = event->auxtrace.tid, - .cpu = event->auxtrace.cpu, + .cpu = { event->auxtrace.cpu }, .data_offset = data_offset, .offset = event->auxtrace.offset, .reference = event->auxtrace.reference, diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index bbf0d78c6401..19910b9011f3 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -240,7 +241,7 @@ struct auxtrace_buffer { size_t size; pid_t pid; pid_t tid; - int cpu; + struct perf_cpu cpu; void *data; off_t data_offset; void *mmap_addr; @@ -350,7 +351,7 @@ struct auxtrace_mmap_params { int prot; int idx; pid_t tid; - int cpu; + struct perf_cpu cpu; }; /** diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 80d1a3a31052..328479df5e16 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -540,7 +540,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) filter_type == BPERF_FILTER_TGID) key = evsel->core.threads->map[i].pid; else if (filter_type == BPERF_FILTER_CPU) - key = evsel->core.cpus->map[i]; + key = evsel->core.cpus->map[i].cpu; else break; @@ -584,7 +584,7 @@ static int bperf_sync_counters(struct evsel *evsel) num_cpu = all_cpu_map->nr; for (i = 0; i < num_cpu; i++) { - cpu = all_cpu_map->map[i]; + cpu = all_cpu_map->map[i].cpu; bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); } return 0; @@ -605,7 +605,7 @@ static int bperf__disable(struct evsel *evsel) static int bperf__read(struct evsel *evsel) { struct bperf_follower_bpf *skel = evsel->follower_skel; - __u32 num_cpu_bpf = cpu__max_cpu(); + __u32 num_cpu_bpf = cpu__max_cpu().cpu; struct bpf_perf_event_value values[num_cpu_bpf]; int reading_map_fd, err = 0; __u32 i; @@ -615,6 +615,7 @@ static int bperf__read(struct evsel *evsel) reading_map_fd = bpf_map__fd(skel->maps.accum_readings); for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + struct perf_cpu entry; __u32 cpu; err = bpf_map_lookup_elem(reading_map_fd, &i, values); @@ -624,14 +625,15 @@ static int bperf__read(struct evsel *evsel) case BPERF_FILTER_GLOBAL: assert(i == 0); - perf_cpu_map__for_each_cpu(cpu, j, all_cpu_map) { + perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) { + cpu = entry.cpu; perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; } break; case BPERF_FILTER_CPU: - cpu = evsel->core.cpus->map[i]; + cpu = evsel->core.cpus->map[i].cpu; perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, i, 0)->run = values[cpu].running; diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index cbc6c2bca488..631e34a0b66f 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -48,7 +48,7 @@ static int bperf_load_program(struct evlist *evlist) struct cgroup *cgrp, *leader_cgrp; __u32 i, cpu; __u32 nr_cpus = evlist->core.all_cpus->nr; - int total_cpus = cpu__max_cpu(); + int total_cpus = cpu__max_cpu().cpu; int map_size, map_fd; int prog_fd, err; @@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist) for (cpu = 0; cpu < nr_cpus; cpu++) { int fd = FD(evsel, cpu); __u32 idx = evsel->core.idx * total_cpus + - evlist->core.all_cpus->map[cpu]; + evlist->core.all_cpus->map[cpu].cpu; err = bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); @@ -212,7 +212,7 @@ static int bperf_cgrp__sync_counters(struct evlist *evlist) int prog_fd = bpf_program__fd(skel->progs.trigger_read); for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i]; + cpu = evlist->core.all_cpus->map[i].cpu; bperf_trigger_reading(prog_fd, cpu); } @@ -245,7 +245,7 @@ static int bperf_cgrp__read(struct evsel *evsel) { struct evlist *evlist = evsel->evlist; int i, cpu, nr_cpus = evlist->core.all_cpus->nr; - int total_cpus = cpu__max_cpu(); + int total_cpus = cpu__max_cpu().cpu; struct perf_counts_values *counts; struct bpf_perf_event_value *values; int reading_map_fd, err = 0; @@ -272,7 +272,7 @@ static int bperf_cgrp__read(struct evsel *evsel) } for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i]; + cpu = evlist->core.all_cpus->map[i].cpu; counts = perf_counts(evsel->counts, i, 0); counts->val = values[cpu].counter; diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 28dc4c60c788..d756cc66eef3 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -63,7 +63,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { - cpu = perf_cpu_map__cpu(ftrace->evlist->core.cpus, i); + cpu = perf_cpu_map__cpu(ftrace->evlist->core.cpus, i).cpu; bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); } } @@ -122,7 +122,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, int i, fd, err; u32 idx; u64 *hist; - int ncpus = cpu__max_cpu(); + int ncpus = cpu__max_cpu().cpu; fd = bpf_map__fd(skel->maps.latency); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 48ce583af0ec..12b2243222b0 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -13,8 +13,8 @@ #include #include -static int max_cpu_num; -static int max_present_cpu_num; +static struct perf_cpu max_cpu_num; +static struct perf_cpu max_present_cpu_num; static int max_node_num; /** * The numa node X as read from /sys/devices/system/node/nodeX indexed by the @@ -37,9 +37,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) * otherwise it would become 65535. */ if (cpus->cpu[i] == (u16) -1) - map->map[i] = -1; + map->map[i].cpu = -1; else - map->map[i] = (int) cpus->cpu[i]; + map->map[i].cpu = (int) cpus->cpu[i]; } } @@ -58,7 +58,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map int cpu, i = 0; for_each_set_bit(cpu, mask->mask, nbits) - map->map[i++] = cpu; + map->map[i++].cpu = cpu; } return map; @@ -91,7 +91,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = -1; + cpus->map[i].cpu = -1; refcount_set(&cpus->refcnt, 1); } @@ -126,13 +126,13 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value) return sysfs__read_int(path, value); } -int cpu__get_socket_id(int cpu) +int cpu__get_socket_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value); return ret ?: value; } -struct aggr_cpu_id aggr_cpu_id__socket(int cpu, void *data __maybe_unused) +struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -161,7 +161,8 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, aggr_cpu_id_get_t get_id, void *data) { - int cpu, idx; + int idx; + struct perf_cpu cpu; struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr); if (!c) @@ -201,14 +202,14 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, } -int cpu__get_die_id(int cpu) +int cpu__get_die_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "die_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value); return ret ?: value; } -struct aggr_cpu_id aggr_cpu_id__die(int cpu, void *data) +struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) { struct aggr_cpu_id id; int die; @@ -231,13 +232,13 @@ struct aggr_cpu_id aggr_cpu_id__die(int cpu, void *data) return id; } -int cpu__get_core_id(int cpu) +int cpu__get_core_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "core_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value); return ret ?: value; } -struct aggr_cpu_id aggr_cpu_id__core(int cpu, void *data) +struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data) { struct aggr_cpu_id id; int core = cpu__get_core_id(cpu); @@ -256,7 +257,7 @@ struct aggr_cpu_id aggr_cpu_id__core(int cpu, void *data) } -struct aggr_cpu_id aggr_cpu_id__cpu(int cpu, void *data) +struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data) { struct aggr_cpu_id id; @@ -270,7 +271,7 @@ struct aggr_cpu_id aggr_cpu_id__cpu(int cpu, void *data) } -struct aggr_cpu_id aggr_cpu_id__node(int cpu, void *data __maybe_unused) +struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -318,8 +319,8 @@ static void set_max_cpu_num(void) int ret = -1; /* set up default */ - max_cpu_num = 4096; - max_present_cpu_num = 4096; + max_cpu_num.cpu = 4096; + max_present_cpu_num.cpu = 4096; mnt = sysfs__mountpoint(); if (!mnt) @@ -332,7 +333,7 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_cpu_num); + ret = get_max_num(path, &max_cpu_num.cpu); if (ret) goto out; @@ -343,11 +344,11 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_present_cpu_num); + ret = get_max_num(path, &max_present_cpu_num.cpu); out: if (ret) - pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num); + pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); } /* Determine highest possible node in the system for sparse allocation */ @@ -386,31 +387,31 @@ int cpu__max_node(void) return max_node_num; } -int cpu__max_cpu(void) +struct perf_cpu cpu__max_cpu(void) { - if (unlikely(!max_cpu_num)) + if (unlikely(!max_cpu_num.cpu)) set_max_cpu_num(); return max_cpu_num; } -int cpu__max_present_cpu(void) +struct perf_cpu cpu__max_present_cpu(void) { - if (unlikely(!max_present_cpu_num)) + if (unlikely(!max_present_cpu_num.cpu)) set_max_cpu_num(); return max_present_cpu_num; } -int cpu__get_node(int cpu) +int cpu__get_node(struct perf_cpu cpu) { if (unlikely(cpunode_map == NULL)) { pr_debug("cpu_map not initialized\n"); return -1; } - return cpunode_map[cpu]; + return cpunode_map[cpu.cpu]; } static int init_cpunode_map(void) @@ -420,13 +421,13 @@ static int init_cpunode_map(void) set_max_cpu_num(); set_max_node_num(); - cpunode_map = calloc(max_cpu_num, sizeof(int)); + cpunode_map = calloc(max_cpu_num.cpu, sizeof(int)); if (!cpunode_map) { pr_err("%s: calloc failed\n", __func__); return -1; } - for (i = 0; i < max_cpu_num; i++) + for (i = 0; i < max_cpu_num.cpu; i++) cpunode_map[i] = -1; return 0; @@ -487,35 +488,37 @@ int cpu__setup_cpunode_map(void) size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) { - int i, cpu, start = -1; + int i, start = -1; bool first = true; size_t ret = 0; #define COMMA first ? "" : "," for (i = 0; i < map->nr + 1; i++) { + struct perf_cpu cpu = { .cpu = INT_MAX }; bool last = i == map->nr; - cpu = last ? INT_MAX : map->map[i]; + if (!last) + cpu = map->map[i]; if (start == -1) { start = i; if (last) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[i]); + map->map[i].cpu); } - } else if (((i - start) != (cpu - map->map[start])) || last) { + } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) { int end = i - 1; if (start == end) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[start]); + map->map[start].cpu); } else { ret += snprintf(buf + ret, size - ret, "%s%d-%d", COMMA, - map->map[start], map->map[end]); + map->map[start].cpu, map->map[end].cpu); } first = false; start = i; @@ -542,23 +545,23 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) int i, cpu; char *ptr = buf; unsigned char *bitmap; - int last_cpu = perf_cpu_map__cpu(map, map->nr - 1); + struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1); if (buf == NULL) return 0; - bitmap = zalloc(last_cpu / 8 + 1); + bitmap = zalloc(last_cpu.cpu / 8 + 1); if (bitmap == NULL) { buf[0] = '\0'; return 0; } for (i = 0; i < map->nr; i++) { - cpu = perf_cpu_map__cpu(map, i); + cpu = perf_cpu_map__cpu(map, i).cpu; bitmap[cpu / 8] |= 1 << (cpu % 8); } - for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) { + for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { unsigned char bits = bitmap[cpu / 8]; if (cpu % 8) @@ -594,7 +597,7 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b a->socket == b->socket && a->die == b->die && a->core == b->core && - a->cpu == b->cpu; + a->cpu.cpu == b->cpu.cpu; } bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) @@ -604,7 +607,7 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) a->socket == -1 && a->die == -1 && a->core == -1 && - a->cpu == -1; + a->cpu.cpu == -1; } struct aggr_cpu_id aggr_cpu_id__empty(void) @@ -615,7 +618,7 @@ struct aggr_cpu_id aggr_cpu_id__empty(void) .socket = -1, .die = -1, .core = -1, - .cpu = -1 + .cpu = (struct perf_cpu){ .cpu = -1 }, }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index b98cd1739677..afc15027d678 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -23,7 +23,7 @@ struct aggr_cpu_id { /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */ int core; /** CPU aggregation, note there is one CPU for each SMT thread. */ - int cpu; + struct perf_cpu cpu; }; /** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */ @@ -48,28 +48,28 @@ const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ int cpu__setup_cpunode_map(void); int cpu__max_node(void); -int cpu__max_cpu(void); -int cpu__max_present_cpu(void); +struct perf_cpu cpu__max_cpu(void); +struct perf_cpu cpu__max_present_cpu(void); /** * cpu__get_node - Returns the numa node X as read from * /sys/devices/system/node/nodeX for the given CPU. */ -int cpu__get_node(int cpu); +int cpu__get_node(struct perf_cpu cpu); /** * cpu__get_socket_id - Returns the socket number as read from * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU. */ -int cpu__get_socket_id(int cpu); +int cpu__get_socket_id(struct perf_cpu cpu); /** * cpu__get_die_id - Returns the die id as read from * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU. */ -int cpu__get_die_id(int cpu); +int cpu__get_die_id(struct perf_cpu cpu); /** * cpu__get_core_id - Returns the core id as read from * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU. */ -int cpu__get_core_id(int cpu); +int cpu__get_core_id(struct perf_cpu cpu); /** * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry @@ -77,7 +77,7 @@ int cpu__get_core_id(int cpu); */ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); -typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(int cpu, void *data); +typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data); /** * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in @@ -98,29 +98,29 @@ struct aggr_cpu_id aggr_cpu_id__empty(void); * the socket for cpu. The function signature is compatible with * aggr_cpu_id_get_t. */ -struct aggr_cpu_id aggr_cpu_id__socket(int cpu, void *data); +struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data); /** * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated * with the die and socket for cpu. The function signature is compatible with * aggr_cpu_id_get_t. */ -struct aggr_cpu_id aggr_cpu_id__die(int cpu, void *data); +struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data); /** * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket * populated with the core, die and socket for cpu. The function signature is * compatible with aggr_cpu_id_get_t. */ -struct aggr_cpu_id aggr_cpu_id__core(int cpu, void *data); +struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data); /** * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket * populated with the cpu, core, die and socket for cpu. The function signature * is compatible with aggr_cpu_id_get_t. */ -struct aggr_cpu_id aggr_cpu_id__cpu(int cpu, void *data); +struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data); /** * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for * cpu. The function signature is compatible with aggr_cpu_id_get_t. */ -struct aggr_cpu_id aggr_cpu_id__node(int cpu, void *data); +struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 8affb37d90e7..84ca106a3246 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -187,7 +187,7 @@ struct cpu_topology *cpu_topology__new(void) struct perf_cpu_map *map; bool has_die = has_die_topology(); - ncpus = cpu__max_present_cpu(); + ncpus = cpu__max_present_cpu().cpu; /* build online CPU map */ map = perf_cpu_map__new(NULL); @@ -218,7 +218,7 @@ struct cpu_topology *cpu_topology__new(void) tp->core_cpus_list = addr; for (i = 0; i < nr; i++) { - if (!perf_cpu_map__has(map, i)) + if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i })) continue; ret = build_cpu_topology(tp, i); @@ -333,7 +333,7 @@ struct numa_topology *numa_topology__new(void) tp->nr = nr; for (i = 0; i < nr; i++) { - if (load_numa_node(&tp->nodes[i], node_map->map[i])) { + if (load_numa_node(&tp->nodes[i], node_map->map[i].cpu)) { numa_topology__delete(tp); tp = NULL; break; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index fd12c0dcaefb..579e44c59914 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -285,13 +285,13 @@ out_enomem: int perf_env__read_cpu_topology_map(struct perf_env *env) { - int cpu, nr_cpus; + int idx, nr_cpus; if (env->cpu != NULL) return 0; if (env->nr_cpus_avail == 0) - env->nr_cpus_avail = cpu__max_present_cpu(); + env->nr_cpus_avail = cpu__max_present_cpu().cpu; nr_cpus = env->nr_cpus_avail; if (nr_cpus == -1) @@ -301,10 +301,12 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) if (env->cpu == NULL) return -ENOMEM; - for (cpu = 0; cpu < nr_cpus; ++cpu) { - env->cpu[cpu].core_id = cpu__get_core_id(cpu); - env->cpu[cpu].socket_id = cpu__get_socket_id(cpu); - env->cpu[cpu].die_id = cpu__get_die_id(cpu); + for (idx = 0; idx < nr_cpus; ++idx) { + struct perf_cpu cpu = { .cpu = idx }; + + env->cpu[idx].core_id = cpu__get_core_id(cpu); + env->cpu[idx].socket_id = cpu__get_socket_id(cpu); + env->cpu[idx].die_id = cpu__get_die_id(cpu); } env->nr_cpus_avail = nr_cpus; @@ -381,7 +383,7 @@ static int perf_env__read_arch(struct perf_env *env) static int perf_env__read_nr_cpus_avail(struct perf_env *env) { if (env->nr_cpus_avail == 0) - env->nr_cpus_avail = cpu__max_present_cpu(); + env->nr_cpus_avail = cpu__max_present_cpu().cpu; return env->nr_cpus_avail ? 0 : -ENOENT; } @@ -487,7 +489,7 @@ const char *perf_env__pmu_mappings(struct perf_env *env) return env->pmu_mappings; } -int perf_env__numa_node(struct perf_env *env, int cpu) +int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) { if (!env->nr_numa_map) { struct numa_node *nn; @@ -495,7 +497,7 @@ int perf_env__numa_node(struct perf_env *env, int cpu) for (i = 0; i < env->nr_numa_nodes; i++) { nn = &env->numa_nodes[i]; - nr = max(nr, perf_cpu_map__max(nn->map)); + nr = max(nr, perf_cpu_map__max(nn->map).cpu); } nr++; @@ -514,13 +516,14 @@ int perf_env__numa_node(struct perf_env *env, int cpu) env->nr_numa_map = nr; for (i = 0; i < env->nr_numa_nodes; i++) { - int tmp, j; + struct perf_cpu tmp; + int j; nn = &env->numa_nodes[i]; - perf_cpu_map__for_each_cpu(j, tmp, nn->map) - env->numa_map[j] = i; + perf_cpu_map__for_each_cpu(tmp, j, nn->map) + env->numa_map[tmp.cpu] = i; } } - return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1; + return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 163e5ec503a2..a3541f98e1fc 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -4,6 +4,7 @@ #include #include +#include "cpumap.h" #include "rwsem.h" struct perf_cpu_map; @@ -170,5 +171,5 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); -int perf_env__numa_node(struct perf_env *env, int cpu); +int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 39d294f6c321..11eb95b2106b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -350,13 +350,13 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin .cpu_map_idx = 0, .evlist_cpu_map_idx = 0, .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus), - .cpu = -1, + .cpu = (struct perf_cpu){ .cpu = -1}, .affinity = affinity, }; if (itr.affinity) { itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); - affinity__set(itr.affinity, itr.cpu); + affinity__set(itr.affinity, itr.cpu.cpu); itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu); /* * If this CPU isn't in the evsel's cpu map then advance through @@ -385,7 +385,7 @@ void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr) perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus, evlist_cpu_itr->evlist_cpu_map_idx); if (evlist_cpu_itr->affinity) - affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu); + affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu); evlist_cpu_itr->cpu_map_idx = perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, evlist_cpu_itr->cpu); @@ -819,7 +819,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp, - int output, int cpu) + int output, struct perf_cpu cpu) { struct mmap *map = container_of(_map, struct mmap, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 57828ebfcb61..64cba56fbc74 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -344,7 +344,7 @@ struct evlist_cpu_iterator { /** The number of CPU map entries in evlist->core.all_cpus. */ int evlist_cpu_map_nr; /** The current CPU of the iterator. */ - int cpu; + struct perf_cpu cpu; /** If present, used to set the affinity when switching between CPUs. */ struct affinity *affinity; }; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 796923c80ff6..7660e0bf3b50 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1594,7 +1594,7 @@ int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other, int cpu_map_idx) { - int cpu; + struct perf_cpu cpu; cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); return perf_cpu_map__idx(other->core.cpus, cpu); @@ -2020,9 +2020,9 @@ retry_open: test_attr__ready(); pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, cpus->map[idx], group_fd, evsel->open_flags); + pid, cpus->map[idx].cpu, group_fd, evsel->open_flags); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[idx], + fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[idx].cpu, group_fd, evsel->open_flags); FD(evsel, idx, thread) = fd; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 666b59baeb70..e808738493e2 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -410,7 +410,7 @@ double expr__get_literal(const char *literal) return smt_on() > 0 ? 1.0 : 0.0; if (!strcmp("#num_cpus", literal)) - return cpu__max_present_cpu(); + return cpu__max_present_cpu().cpu; /* * Assume that topology strings are consistent, such as CPUs "0-1" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e3c1a532d059..6da12e522edc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -472,7 +472,7 @@ static int write_nrcpus(struct feat_fd *ff, u32 nrc, nra; int ret; - nrc = cpu__max_present_cpu(); + nrc = cpu__max_present_cpu().cpu; nr = sysconf(_SC_NPROCESSORS_ONLN); if (nr < 0) @@ -1163,7 +1163,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp) u32 nr, cpu; u16 level; - nr = cpu__max_cpu(); + nr = cpu__max_cpu().cpu; for (cpu = 0; cpu < nr; cpu++) { for (level = 0; level < MAX_CACHE_LVL; level++) { @@ -1195,7 +1195,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp) static int write_cache(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - u32 max_caches = cpu__max_cpu() * MAX_CACHE_LVL; + u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL; struct cpu_cache_level caches[max_caches]; u32 cnt = 0, i, version = 1; int ret; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 23ecdba9e670..12261ed8c15b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -94,7 +94,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx) } } -static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) +static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity) { void *data; size_t mmap_len; @@ -138,7 +138,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx) } static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused, - int cpu __maybe_unused, int affinity __maybe_unused) + struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused) { return 0; } @@ -240,7 +240,8 @@ void mmap__munmap(struct mmap *map) static void build_node_mask(int node, struct mmap_cpu_mask *mask) { - int c, cpu, nr_cpus; + int idx, nr_cpus; + struct perf_cpu cpu; const struct perf_cpu_map *cpu_map = NULL; cpu_map = cpu_map__online(); @@ -248,16 +249,16 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) return; nr_cpus = perf_cpu_map__nr(cpu_map); - for (c = 0; c < nr_cpus; c++) { - cpu = cpu_map->map[c]; /* map c index to online cpu index */ + for (idx = 0; idx < nr_cpus; idx++) { + cpu = cpu_map->map[idx]; /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) - set_bit(cpu, mask->bits); + set_bit(cpu.cpu, mask->bits); } } static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) { - map->affinity_mask.nbits = cpu__max_cpu(); + map->affinity_mask.nbits = cpu__max_cpu().cpu; map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits); if (!map->affinity_mask.bits) return -1; @@ -265,12 +266,12 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params * if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - set_bit(map->core.cpu, map->affinity_mask.bits); + set_bit(map->core.cpu.cpu, map->affinity_mask.bits); return 0; } -int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu) { if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 8e259b9610f8..83f6bd4d4082 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include // for cpu_set_t #ifdef HAVE_AIO_SUPPORT @@ -52,7 +53,7 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu); void mmap__munmap(struct mmap *map); union perf_event *perf_mmap__read_forward(struct mmap *map); diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 020411682a3c..734d006d9a8c 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -11,7 +11,7 @@ typedef void (*setup_probe_fn_t)(struct evsel *evsel); -static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str) { struct evlist *evlist; struct evsel *evsel; @@ -29,7 +29,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) evsel = evlist__first(evlist); while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags); if (fd < 0) { if (pid == -1 && errno == EACCES) { pid = 0; @@ -43,7 +43,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) fn(evsel); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags); if (fd < 0) { if (errno == EINVAL) err = -EINVAL; @@ -61,7 +61,8 @@ static bool perf_probe_api(setup_probe_fn_t fn) { const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; struct perf_cpu_map *cpus; - int cpu, ret, i = 0; + struct perf_cpu cpu; + int ret, i = 0; cpus = perf_cpu_map__new(NULL); if (!cpus) @@ -136,15 +137,17 @@ bool perf_can_record_cpu_wide(void) .exclude_kernel = 1, }; struct perf_cpu_map *cpus; - int cpu, fd; + struct perf_cpu cpu; + int fd; cpus = perf_cpu_map__new(NULL); if (!cpus) return false; + cpu = cpus->map[0]; perf_cpu_map__put(cpus); - fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0); if (fd < 0) return false; close(fd); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 7f782a31bda3..95fb53899bcd 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1057,7 +1057,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu) for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *md = &evlist->mmap[i]; - if (md->core.cpu == cpu) + if (md->core.cpu.cpu == cpu) return md; } @@ -1443,7 +1443,7 @@ error: * Dummy, to avoid dragging all the test_attr infrastructure in the python * binding. */ -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { } diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index bff669b615ee..20461f174991 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call if (opts->group) evlist__set_leader(evlist); - if (evlist->core.cpus->map[0] < 0) + if (evlist->core.cpus->map[0].cpu < 0) opts->no_inherit = true; use_comm_exec = perf_can_comm_exec(); @@ -229,7 +229,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) { struct evlist *temp_evlist; struct evsel *evsel; - int err, fd, cpu; + int err, fd; + struct perf_cpu cpu = { .cpu = 0 }; bool ret = false; pid_t pid = -1; @@ -246,14 +247,16 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) { struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); - cpu = cpus ? cpus->map[0] : 0; + if (cpus) + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); } else { cpu = evlist->core.cpus->map[0]; } while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, perf_event_open_cloexec_flag()); if (fd < 0) { if (pid == -1 && errno == EACCES) { diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 0445bee9290f..bd95d60018a9 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1555,7 +1555,7 @@ static void get_handler_name(char *str, size_t size, } static void -process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp, +process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp, struct perf_counts_values *count) { PyObject *handler, *t; @@ -1575,7 +1575,7 @@ process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp, return; } - PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu)); PyTuple_SetItem(t, n++, _PyLong_FromLong(thread)); tuple_set_u64(t, n++, tstamp); @@ -1599,7 +1599,7 @@ static void python_process_stat(struct perf_stat_config *config, int cpu, thread; if (config->aggr_mode == AGGR_GLOBAL) { - process_stat(counter, -1, -1, tstamp, + process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp, &counter->counts->aggr); return; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e1a273048681..f19348dddd55 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2538,15 +2538,15 @@ int perf_session__cpu_bitmap(struct perf_session *session, } for (i = 0; i < map->nr; i++) { - int cpu = map->map[i]; + struct perf_cpu cpu = map->map[i]; - if (cpu >= nr_cpus) { + if (cpu.cpu >= nr_cpus) { pr_err("Requested CPU %d too large. " - "Consider raising MAX_NR_CPUS\n", cpu); + "Consider raising MAX_NR_CPUS\n", cpu.cpu); goto out_delete_map; } - set_bit(cpu, cpu_bitmap); + set_bit(cpu.cpu, cpu_bitmap); } err = 0; @@ -2598,7 +2598,7 @@ int perf_event__process_id_index(struct perf_session *session, if (!sid) return -ENOENT; sid->idx = e->idx; - sid->cpu = e->cpu; + sid->cpu.cpu = e->cpu; sid->tid = e->tid; } return 0; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ba95379efcfb..5db83e51ceef 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -121,10 +121,10 @@ static void aggr_printout(struct perf_stat_config *config, id.die, config->csv_output ? 0 : -3, id.core, config->csv_sep); - } else if (id.cpu > -1) { + } else if (id.cpu.cpu > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - id.cpu, config->csv_sep); + id.cpu.cpu, config->csv_sep); } break; case AGGR_THREAD: @@ -331,7 +331,8 @@ static int first_shadow_cpu_map_idx(struct perf_stat_config *config, struct evsel *evsel, const struct aggr_cpu_id *id) { struct perf_cpu_map *cpus = evsel__cpus(evsel); - int cpu, idx; + struct perf_cpu cpu; + int idx; if (config->aggr_mode == AGGR_NONE) return perf_cpu_map__idx(cpus, id->cpu); @@ -513,7 +514,8 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int static void aggr_update_shadow(struct perf_stat_config *config, struct evlist *evlist) { - int cpu, idx, s; + int idx, s; + struct perf_cpu cpu; struct aggr_cpu_id s2, id; u64 val; struct evsel *counter; @@ -633,7 +635,8 @@ static void aggr_cb(struct perf_stat_config *config, struct evsel *counter, void *data, bool first) { struct aggr_data *ad = data; - int idx, cpu; + int idx; + struct perf_cpu cpu; struct perf_cpu_map *cpus; struct aggr_cpu_id s2; @@ -666,7 +669,7 @@ static void aggr_cb(struct perf_stat_config *config, static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first, int cpu) + bool *first, struct perf_cpu cpu) { struct aggr_data ad; FILE *output = config->output; @@ -696,7 +699,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - if (cpu != -1) + if (cpu.cpu != -1) id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); printout(config, id, nr, counter, uval, @@ -731,8 +734,8 @@ static void print_aggr(struct perf_stat_config *config, first = true; evlist__for_each_entry(evlist, counter) { print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, /*cpu=*/-1); + prefix, metric_only, + &first, (struct perf_cpu){ .cpu = -1 }); } if (metric_only) fputc('\n', output); @@ -893,7 +896,8 @@ static void print_counter(struct perf_stat_config *config, FILE *output = config->output; u64 ena, run, val; double uval; - int idx, cpu; + int idx; + struct perf_cpu cpu; struct aggr_cpu_id id; perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { @@ -921,7 +925,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - int all_idx, cpu; + int all_idx; + struct perf_cpu cpu; perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.cpus) { struct evsel *counter; @@ -1211,7 +1216,8 @@ static void print_percore_thread(struct perf_stat_config *config, struct aggr_cpu_id s2, id; struct perf_cpu_map *cpus; bool first = true; - int idx, cpu; + int idx; + struct perf_cpu cpu; cpus = evsel__cpus(counter); perf_cpu_map__for_each_cpu(cpu, idx, cpus) { @@ -1247,8 +1253,8 @@ static void print_percore(struct perf_stat_config *config, fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, /*cpu=*/-1); + prefix, metric_only, + &first, (struct perf_cpu){ .cpu = -1 }); } if (metric_only) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 7dbd7c4f3c33..ee6f03481215 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -297,7 +297,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, { struct hashmap *mask = counter->per_pkg_mask; struct perf_cpu_map *cpus = evsel__cpus(counter); - int cpu = perf_cpu_map__cpu(cpus, cpu_map_idx); + struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx); int s, d, ret = 0; uint64_t *key; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 691c12fd8976..335d19cc3063 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -108,7 +108,7 @@ struct runtime_stat { struct rblist value_list; }; -typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, int cpu); +typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); struct perf_stat_config { enum aggr_mode aggr_mode; diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 96f941e01681..4c9f211249db 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -728,7 +728,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) int i; int ret = 0; struct perf_cpu_map *m; - int c; + struct perf_cpu c; m = perf_cpu_map__new(s); if (!m) @@ -736,12 +736,12 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) for (i = 0; i < m->nr; i++) { c = m->map[i]; - if (c >= nr_cpus) { + if (c.cpu >= nr_cpus) { ret = -1; break; } - set_bit(c, cpumask_bits(b)); + set_bit(c.cpu, cpumask_bits(b)); } perf_cpu_map__put(m); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 198982109f0f..c9ba8050cc2b 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1191,7 +1191,7 @@ static void synthesize_cpus(struct cpu_map_entries *cpus, cpus->nr = map->nr; for (i = 0; i < map->nr; i++) - cpus->cpu[i] = map->map[i]; + cpus->cpu[i] = map->map[i].cpu; } static void synthesize_mask(struct perf_record_record_cpu_map *mask, @@ -1203,7 +1203,7 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask, mask->long_size = sizeof(long); for (i = 0; i < map->nr; i++) - set_bit(map->map[i], mask->mask); + set_bit(map->map[i].cpu, mask->mask); } static size_t cpus_size(struct perf_cpu_map *map) @@ -1219,7 +1219,7 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) for (i = 0; i < map->nr; i++) { /* bit position of the cpu is + 1 */ - int bit = map->map[i] + 1; + int bit = map->map[i].cpu + 1; if (bit > *max) *max = bit; @@ -1354,7 +1354,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool, } int perf_event__synthesize_stat(struct perf_tool *tool, - u32 cpu, u32 thread, u64 id, + struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine) @@ -1366,7 +1366,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool, event.header.misc = 0; event.id = id; - event.cpu = cpu; + event.cpu = cpu.cpu; event.thread = thread; event.val = count->val; event.ena = count->ena; @@ -1763,7 +1763,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ } e->idx = sid->idx; - e->cpu = sid->cpu; + e->cpu = sid->cpu.cpu; e->tid = sid->tid; } } diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index c931433bacbf..78a0450db164 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -6,6 +6,7 @@ #include // pid_t #include #include +#include struct auxtrace_record; struct dso; @@ -63,7 +64,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9f0d36ba77f2..9443c29afa52 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,6 +11,9 @@ #include #include #include +#ifndef __cplusplus +#include +#endif /* General helper functions */ void usage(const char *err) __noreturn; @@ -66,6 +69,6 @@ extern bool test_attr__enabled; void test_attr__ready(void); void test_attr__init(void); struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags); #endif /* GIT_COMPAT_UTIL_H */ From 0ce05781f4905fcfbbb489519e36be71c7b0bbcc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Nov 2021 16:12:30 -0800 Subject: [PATCH 381/493] perf tools: Fix SMT fallback with large core counts strtoull can only read a 64-bit bitmap. On an AMD EPYC core_cpus may look like: 00000000,00000000,00000000,00000001,00000000,00000000,00000000,00000001 and so the sibling wasn't spotted. Fix by writing a simple hweight string parser. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Konstantin Khlebnikov Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211124001231.3277836-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/smt.c | 68 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 34f1b1b1176c..2636be65305a 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -5,6 +5,56 @@ #include "api/fs/fs.h" #include "smt.h" +/** + * hweight_str - Returns the number of bits set in str. Stops at first non-hex + * or ',' character. + */ +static int hweight_str(char *str) +{ + int result = 0; + + while (*str) { + switch (*str++) { + case '0': + case ',': + break; + case '1': + case '2': + case '4': + case '8': + result++; + break; + case '3': + case '5': + case '6': + case '9': + case 'a': + case 'A': + case 'c': + case 'C': + result += 2; + break; + case '7': + case 'b': + case 'B': + case 'd': + case 'D': + case 'e': + case 'E': + result += 3; + break; + case 'f': + case 'F': + result += 4; + break; + default: + goto done; + } + } +done: + return result; +} + int smt_on(void) { static bool cached; @@ -15,9 +65,12 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) - goto done; + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) { + cached = true; + return cached_result; + } + cached_result = 0; ncpu = sysconf(_SC_NPROCESSORS_CONF); for (cpu = 0; cpu < ncpu; cpu++) { unsigned long long siblings; @@ -35,18 +88,13 @@ int smt_on(void) continue; } /* Entry is hex, but does not have 0x, so need custom parser */ - siblings = strtoull(str, NULL, 16); + siblings = hweight_str(str); free(str); - if (hweight64(siblings) > 1) { + if (siblings > 1) { cached_result = 1; - cached = true; break; } } - if (!cached) { - cached_result = 0; -done: - cached = true; - } + cached = true; return cached_result; } From 6dd8646939a770e4ec0220c1c19d6af25c5877b7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Nov 2021 16:12:31 -0800 Subject: [PATCH 382/493] perf tools: Probe non-deprecated sysfs path 1st Following Documentation/ABI/stable/sysfs-devices-system-cpu the /sys/devices/system/cpu/cpuX/topology/core_cpus is deprecated in favor of thread_siblings, so probe thread_siblings before falling back on core_cpus. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Konstantin Khlebnikov Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211124001231.3277836-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/smt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 2636be65305a..2b0a36ebf27a 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -79,11 +79,10 @@ int smt_on(void) char fn[256]; snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/core_cpus", cpu); + "devices/system/cpu/cpu%d/topology/thread_siblings", cpu); if (sysfs__read_str(fn, &str, &strlen) < 0) { snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/thread_siblings", - cpu); + "devices/system/cpu/cpu%d/topology/core_cpus", cpu); if (sysfs__read_str(fn, &str, &strlen) < 0) continue; } From f56ef30a31d388663e78b9be687d67748c9b7297 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Nov 2021 16:12:28 -0800 Subject: [PATCH 383/493] perf expr: Add debug logging for literals Useful for diagnosing problems with metrics. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Konstantin Khlebnikov Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211124001231.3277836-1-irogers@google.com [ Fixed up perf_cpu conflict, i.e. we need to append ".cpu" to cpu__max_present_cpu() result ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index e808738493e2..c94fb9bef919 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -405,12 +405,17 @@ double expr_id_data__source_count(const struct expr_id_data *data) double expr__get_literal(const char *literal) { static struct cpu_topology *topology; + double result = NAN; - if (!strcmp("#smt_on", literal)) - return smt_on() > 0 ? 1.0 : 0.0; + if (!strcmp("#smt_on", literal)) { + result = smt_on() > 0 ? 1.0 : 0.0; + goto out; + } - if (!strcmp("#num_cpus", literal)) - return cpu__max_present_cpu().cpu; + if (!strcmp("#num_cpus", literal)) { + result = cpu__max_present_cpu().cpu; + goto out; + } /* * Assume that topology strings are consistent, such as CPUs "0-1" @@ -422,16 +427,24 @@ double expr__get_literal(const char *literal) topology = cpu_topology__new(); if (!topology) { pr_err("Error creating CPU topology"); - return NAN; + goto out; } } - if (!strcmp("#num_packages", literal)) - return topology->package_cpus_lists; - if (!strcmp("#num_dies", literal)) - return topology->die_cpus_lists; - if (!strcmp("#num_cores", literal)) - return topology->core_cpus_lists; + if (!strcmp("#num_packages", literal)) { + result = topology->package_cpus_lists; + goto out; + } + if (!strcmp("#num_dies", literal)) { + result = topology->die_cpus_lists; + goto out; + } + if (!strcmp("#num_cores", literal)) { + result = topology->core_cpus_lists; + goto out; + } pr_err("Unrecognized literal '%s'", literal); - return NAN; +out: + pr_debug2("literal: %s = %f\n", literal, result); + return result; } From c0dd94558d0e473aa92254e1c48a47900c911e69 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 25 Nov 2021 23:13:05 -0800 Subject: [PATCH 384/493] perf pmu-events: Don't lower case MetricExpr This patch changes MetricExpr to be written out in the same case. This enables events in metrics to use modifiers like 'G' which currently yield parse errors when made lower case. To keep tests passing the literal #smt_on is compared in a non-case sensitive way - #SMT_on is present in at least SkylakeX metrics. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211126071305.3733878-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 2 -- tools/perf/util/expr.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 2e7c4153875b..1a57c3f81dd4 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -672,8 +672,6 @@ static int json_events(const char *fn, addfield(map, &je.metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &je.metric_expr, "", "", val); - for (s = je.metric_expr; *s; s++) - *s = tolower(*s); } else if (json_streq(map, field, "ArchStdEvent")) { addfield(map, &arch_std, "", "", val); for (s = arch_std; *s; s++) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index c94fb9bef919..675f318ce7c1 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -407,7 +407,7 @@ double expr__get_literal(const char *literal) static struct cpu_topology *topology; double result = NAN; - if (!strcmp("#smt_on", literal)) { + if (!strcasecmp("#smt_on", literal)) { result = smt_on() > 0 ? 1.0 : 0.0; goto out; } From 415b4b6c447ae03cb1d9cfc91df39616c92f15e2 Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Thu, 6 Jan 2022 01:45:56 +0800 Subject: [PATCH 385/493] ACPI: PCC: pcc_ctx can be static drivers/acpi/acpi_pcc.c:34:22: warning: symbol 'pcc_ctx' was not declared. Should it be static? Reported-by: kernel test robot Signed-off-by: kernel test robot Reviewed-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_pcc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_pcc.c b/drivers/acpi/acpi_pcc.c index 41e3ebd204ff..a12b55d81209 100644 --- a/drivers/acpi/acpi_pcc.c +++ b/drivers/acpi/acpi_pcc.c @@ -31,7 +31,7 @@ struct pcc_data { struct acpi_pcc_info ctx; }; -struct acpi_pcc_info pcc_ctx; +static struct acpi_pcc_info pcc_ctx; static void pcc_rx_callback(struct mbox_client *cl, void *m) { From 3f4b32511a77bc5a05cfbf26fec94c4e1b1cf46a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:18 +0000 Subject: [PATCH 386/493] PM: core: Remove DEFINE_UNIVERSAL_DEV_PM_OPS() macro The deprecated UNIVERSAL_DEV_PM_OPS() macro uses the provided callbacks for both runtime PM and system sleep, which is very likely to be a mistake, as a system sleep can be triggered while a given device is already PM-suspended, which would cause the suspend callback to be called twice. The amount of users of UNIVERSAL_DEV_PM_OPS() is also tiny (16 occurences) compared to the number of places where SET_SYSTEM_SLEEP_PM_OPS() is used with pm_runtime_force_suspend() and pm_runtime_force_resume(), which makes me think that none of these cases are actually valid. As the new macro DEFINE_UNIVERSAL_DEV_PM_OPS() which was introduced to replace UNIVERSAL_DEV_PM_OPS() is currently unused, remove it before someone starts to use it in yet another invalid case. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index e1e9402180b9..02f059d814bb 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -366,6 +366,12 @@ static const struct dev_pm_ops name = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } +/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ +#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +const struct dev_pm_ops __maybe_unused name = { \ + SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +} + /* * Use this for defining a set of PM operations to be used in all situations * (system suspend, hibernation or runtime PM). @@ -378,20 +384,9 @@ static const struct dev_pm_ops name = { \ * suspend and "early" resume callback pointers, .suspend_late() and * .resume_early(), to the same routines as .runtime_suspend() and * .runtime_resume(), respectively (and analogously for hibernation). + * + * Deprecated. You most likely don't want this macro. */ -#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ -static const struct dev_pm_ops name = { \ - SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ -} - -/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ -#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -const struct dev_pm_ops __maybe_unused name = { \ - SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ -} - -/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */ #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ const struct dev_pm_ops __maybe_unused name = { \ SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ From 52cc1d7f9786d2be44a3ab9b5b48416a7618e713 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:19 +0000 Subject: [PATCH 387/493] PM: core: Remove static qualifier in DEFINE_SIMPLE_DEV_PM_OPS macro Keep this macro in line with the other ones. This makes it possible to use them in the cases where the underlying dev_pm_ops structure is exported. Restore the "static" qualifier in the two drivers where the DEFINE_SIMPLE_DEV_PM_OPS macro was used. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/mmc/host/jz4740_mmc.c | 4 ++-- drivers/mmc/host/mxcmmc.c | 2 +- include/linux/pm.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index bb612fce7ead..4ac87b0797bb 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -1113,8 +1113,8 @@ static int jz4740_mmc_resume(struct device *dev) return pinctrl_select_default_state(dev); } -DEFINE_SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend, - jz4740_mmc_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend, + jz4740_mmc_resume); static struct platform_driver jz4740_mmc_driver = { .probe = jz4740_mmc_probe, diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index 98c218bd6669..40b6878bea6c 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -1210,7 +1210,7 @@ static int mxcmci_resume(struct device *dev) return ret; } -DEFINE_SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume); static struct platform_driver mxcmci_driver = { .probe = mxcmci_probe, diff --git a/include/linux/pm.h b/include/linux/pm.h index 02f059d814bb..8e13387e70ec 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -362,7 +362,7 @@ struct dev_pm_ops { * to RAM and hibernation. */ #define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -static const struct dev_pm_ops name = { \ +const struct dev_pm_ops name = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } From 0ae101fdd3297b7165755340e05386f1e1379709 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:20 +0000 Subject: [PATCH 388/493] PM: core: Add EXPORT[_GPL]_SIMPLE_DEV_PM_OPS macros These macros are defined conditionally, according to CONFIG_PM: - if CONFIG_PM is enabled, these macros resolve to DEFINE_SIMPLE_DEV_PM_OPS(), and the dev_pm_ops symbol will be exported. - if CONFIG_PM is disabled, these macros will result in a dummy static dev_pm_ops to be created with the __maybe_unused flag. The dev_pm_ops will then be discarded by the compiler, along with the provided callback functions if they are not used anywhere else. In the second case, the symbol is not exported, which should be perfectly fine - users of the symbol should all use the pm_ptr() or pm_sleep_ptr() macro, so the dev_pm_ops marked as "extern" in the client's code will never be accessed. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index 8e13387e70ec..8279af2c538a 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -8,6 +8,7 @@ #ifndef _LINUX_PM_H #define _LINUX_PM_H +#include #include #include #include @@ -357,14 +358,42 @@ struct dev_pm_ops { #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) #endif +#define _DEFINE_DEV_PM_OPS(name, \ + suspend_fn, resume_fn, \ + runtime_suspend_fn, runtime_resume_fn, idle_fn) \ +const struct dev_pm_ops name = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \ +} + +#ifdef CONFIG_PM +#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ + runtime_resume_fn, idle_fn, sec) \ + _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ + runtime_resume_fn, idle_fn); \ + _EXPORT_SYMBOL(name, sec) +#else +#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ + runtime_resume_fn, idle_fn, sec) \ +static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ + resume_fn, runtime_suspend_fn, \ + runtime_resume_fn, idle_fn) +#endif + /* * Use this if you want to use the same suspend and resume callbacks for suspend * to RAM and hibernation. + * + * If the underlying dev_pm_ops struct symbol has to be exported, use + * EXPORT_SIMPLE_DEV_PM_OPS() or EXPORT_GPL_SIMPLE_DEV_PM_OPS() instead. */ #define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -const struct dev_pm_ops name = { \ - SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ -} + _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) + +#define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "") +#define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl") /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ From 9d8619190031af0a314bee865262d8975473e4dd Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:21 +0000 Subject: [PATCH 389/493] PM: runtime: Add DEFINE_RUNTIME_DEV_PM_OPS() macro A lot of drivers create a dev_pm_ops struct with the system sleep suspend/resume callbacks set to pm_runtime_force_suspend() and pm_runtime_force_resume(). These drivers can now use the DEFINE_RUNTIME_DEV_PM_OPS() macro, which will use pm_runtime_force_{suspend,resume}() as the system sleep callbacks, while having the same dead code removal characteristic that is already provided by DEFINE_SIMPLE_DEV_PM_OPS(). Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 3 ++- include/linux/pm_runtime.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index 8279af2c538a..f7d2be686359 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -414,7 +414,8 @@ const struct dev_pm_ops __maybe_unused name = { \ * .resume_early(), to the same routines as .runtime_suspend() and * .runtime_resume(), respectively (and analogously for hibernation). * - * Deprecated. You most likely don't want this macro. + * Deprecated. You most likely don't want this macro. Use + * DEFINE_RUNTIME_DEV_PM_OPS() instead. */ #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ const struct dev_pm_ops __maybe_unused name = { \ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 016de5776b6d..4af454d29281 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -22,6 +22,20 @@ usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ +/* + * Use this for defining a set of PM operations to be used in all situations + * (system suspend, hibernation or runtime PM). + * + * Note that the behaviour differs from the deprecated UNIVERSAL_DEV_PM_OPS() + * macro, which uses the provided callbacks for both runtime PM and system + * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend() + * and pm_runtime_force_resume() for its system sleep callbacks. + */ +#define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ + _DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \ + pm_runtime_force_resume, suspend_fn, \ + resume_fn, idle_fn) + #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; From d59ff7d9d84b03d22c5107f794e28fc8e1fce3a6 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:22 +0000 Subject: [PATCH 390/493] PM: runtime: Add EXPORT[_GPL]_RUNTIME_DEV_PM_OPS macros Similar to EXPORT[_GPL]_SIMPLE_DEV_PM_OPS, but for users with runtime-PM suspend/resume callbacks. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4af454d29281..9f09601c465a 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -30,12 +30,22 @@ * macro, which uses the provided callbacks for both runtime PM and system * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend() * and pm_runtime_force_resume() for its system sleep callbacks. + * + * If the underlying dev_pm_ops struct symbol has to be exported, use + * EXPORT_RUNTIME_DEV_PM_OPS() or EXPORT_GPL_RUNTIME_DEV_PM_OPS() instead. */ #define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ _DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \ pm_runtime_force_resume, suspend_fn, \ resume_fn, idle_fn) +#define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ + _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ + suspend_fn, resume_fn, idle_fn, "") +#define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ + _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ + suspend_fn, resume_fn, idle_fn, "_gpl") + #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; From 5865918fe49ed3cb9d7b5d21f41aff8a68fbceb1 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:23 +0000 Subject: [PATCH 391/493] iio: pressure: bmp280: Use new PM macros Use the new EXPORT_RUNTIME_DEV_PM_OPS() macro. It allows the underlying dev_pm_ops struct as well as the suspend/resume callbacks to be detected as dead code in the case where CONFIG_PM is disabled, without having to wrap everything inside #ifdef CONFIG_PM guards. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/iio/pressure/bmp280-core.c | 11 ++--------- drivers/iio/pressure/bmp280-i2c.c | 2 +- drivers/iio/pressure/bmp280-spi.c | 2 +- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 6b7da40f99c8..bf8167f43c56 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -1138,7 +1138,6 @@ int bmp280_common_probe(struct device *dev, } EXPORT_SYMBOL(bmp280_common_probe); -#ifdef CONFIG_PM static int bmp280_runtime_suspend(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); @@ -1159,15 +1158,9 @@ static int bmp280_runtime_resume(struct device *dev) usleep_range(data->start_up_time, data->start_up_time + 100); return data->chip_info->chip_config(data); } -#endif /* CONFIG_PM */ -const struct dev_pm_ops bmp280_dev_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) - SET_RUNTIME_PM_OPS(bmp280_runtime_suspend, - bmp280_runtime_resume, NULL) -}; -EXPORT_SYMBOL(bmp280_dev_pm_ops); +EXPORT_RUNTIME_DEV_PM_OPS(bmp280_dev_pm_ops, bmp280_runtime_suspend, + bmp280_runtime_resume, NULL); MODULE_AUTHOR("Vlad Dogaru "); MODULE_DESCRIPTION("Driver for Bosch Sensortec BMP180/BMP280 pressure and temperature sensor"); diff --git a/drivers/iio/pressure/bmp280-i2c.c b/drivers/iio/pressure/bmp280-i2c.c index 8b03ea15c0d0..35045bd92846 100644 --- a/drivers/iio/pressure/bmp280-i2c.c +++ b/drivers/iio/pressure/bmp280-i2c.c @@ -58,7 +58,7 @@ static struct i2c_driver bmp280_i2c_driver = { .driver = { .name = "bmp280", .of_match_table = bmp280_of_i2c_match, - .pm = &bmp280_dev_pm_ops, + .pm = pm_ptr(&bmp280_dev_pm_ops), }, .probe = bmp280_i2c_probe, .id_table = bmp280_i2c_id, diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c index 625b86878ad8..41f6cc56d229 100644 --- a/drivers/iio/pressure/bmp280-spi.c +++ b/drivers/iio/pressure/bmp280-spi.c @@ -109,7 +109,7 @@ static struct spi_driver bmp280_spi_driver = { .driver = { .name = "bmp280", .of_match_table = bmp280_of_spi_match, - .pm = &bmp280_dev_pm_ops, + .pm = pm_ptr(&bmp280_dev_pm_ops), }, .id_table = bmp280_spi_id, .probe = bmp280_spi_probe, From 13462ba1815db5a96891293a9cfaa2451f7bd623 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 8 Jan 2022 07:09:48 -0800 Subject: [PATCH 392/493] i3c: master: dw: check return of dw_i3c_master_get_free_pos() Clang static analysis reports this problem dw-i3c-master.c:799:9: warning: The result of the left shift is undefined because the left operand is negative COMMAND_PORT_DEV_INDEX(pos) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ pos can be negative because dw_i3c_master_get_free_pos() can return an error. So check for an error. Fixes: 1dd728f5d4d4 ("i3c: master: Add driver for Synopsys DesignWare IP") Signed-off-by: Tom Rix Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220108150948.3988790-1-trix@redhat.com --- drivers/i3c/master/dw-i3c-master.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 03a368da51b9..51a8608203de 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -793,6 +793,10 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) return -ENOMEM; pos = dw_i3c_master_get_free_pos(master); + if (pos < 0) { + dw_i3c_master_free_xfer(xfer); + return pos; + } cmd = &xfer->cmds[0]; cmd->cmd_hi = 0x1; cmd->cmd_lo = COMMAND_PORT_DEV_COUNT(master->maxdevs - pos) | From 35cb8c713a496e8c114eed5e2a5a30b359876df2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:19:37 -0300 Subject: [PATCH 393/493] tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' To bring in the change made in this cset: f94909ceb1ed4bfd ("x86: Prepare asm files for straight-line-speculation") It silences these perf tools build warnings, no change in the tools: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S The code generated was checked before and after using 'objdump -d /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o', no changes. Cc: Borislav Petkov Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 12 ++++++------ tools/arch/x86/lib/memset_64.S | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 1cc9da6e29c7..59cf2343f3d9 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy) rep movsq movl %edx, %ecx rep movsb - ret + RET SYM_FUNC_END(memcpy) SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) @@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb - ret + RET SYM_FUNC_END(memcpy_erms) SYM_FUNC_START_LOCAL(memcpy_orig) @@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_16bytes: cmpl $8, %edx @@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_8bytes: cmpl $4, %edx @@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) - retq + RET .p2align 4 .Lless_3bytes: subl $1, %edx @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movb %cl, (%rdi) .Lend: - retq + RET SYM_FUNC_END(memcpy_orig) .popsection diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 9827ae267f96..d624f2bc42f1 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) movl %edx,%ecx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) EXPORT_SYMBOL(memset) @@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms) movq %rdx,%rcx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(memset_erms) SYM_FUNC_START_LOCAL(memset_orig) @@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig) .Lende: movq %r10,%rax - ret + RET .Lbad_alignment: cmpq $7,%rdx From f1dcda0f79548c04f585108e2e165cb4fec951e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 May 2021 11:48:26 -0300 Subject: [PATCH 394/493] tools headers UAPI: Update tools's copy of drm.h header Picking the changes from: 43d5ac7d07023cd1 ("drm: document DRM_IOCTL_MODE_GETFB2") It is just a comment, so no changes and silences these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h Cc: Simon Ser Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 3b810b53ba8b..642808520d92 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1096,6 +1096,24 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Planes are valid until one has a + * zero handle -- this can be used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) /* From 486e5ed88827dabd295cd55f368d513ee8c30eb1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Jul 2021 13:39:15 -0300 Subject: [PATCH 395/493] tools headers cpufeatures: Sync with the kernel sources To pick the changes from: d341db8f48ea4331 ("x86/cpufeatures: Add AMD Collaborative Processor Performance Control feature flag") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Huang Rui Cc: Rafael J. Wysocki Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d5b5f2ab87a0..18de5f76f198 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -315,6 +315,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ From 28a53d3160acd7e44a39a146da20e3e672fb0d96 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 14:59:00 +0900 Subject: [PATCH 396/493] ata: ahci_mtk: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_MEDIATEK. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 47b7b69b88b1..305718031a3c 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -206,7 +206,7 @@ config AHCI_CEVA config AHCI_MTK tristate "MediaTek AHCI SATA support" - depends on ARCH_MEDIATEK + depends on ARCH_MEDIATEK || COMPILE_TEST select MFD_SYSCON select SATA_HOST help From 368c7edc15e5e505ab56d6caad60fd11ee2bc428 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:01:01 +0900 Subject: [PATCH 397/493] ata: ahci_mvebu: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_MVEBU. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 305718031a3c..830e781e50c3 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -217,7 +217,7 @@ config AHCI_MTK config AHCI_MVEBU tristate "Marvell EBU AHCI SATA support" - depends on ARCH_MVEBU + depends on ARCH_MVEBU || COMPILE_TEST select SATA_HOST help This option enables support for the Marvebu EBU SoC's From c05b911afffa6a1842dd3bb9d54a8db178722e40 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:06:12 +0900 Subject: [PATCH 398/493] ata: ahci_sunxi: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_SUNXI. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 830e781e50c3..4da5e9410126 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -236,7 +236,7 @@ config AHCI_OCTEON config AHCI_SUNXI tristate "Allwinner sunxi AHCI SATA support" - depends on ARCH_SUNXI + depends on ARCH_SUNXI || COMPILE_TEST select SATA_HOST help This option enables support for the Allwinner sunxi SoC's From 3d98cbf7096ea50bbb4256c7781555bb69a07e52 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:08:01 +0900 Subject: [PATCH 399/493] ata: ahci_tegra: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_TEGRA. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 4da5e9410126..49ce1e0d19d7 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -246,7 +246,7 @@ config AHCI_SUNXI config AHCI_TEGRA tristate "NVIDIA Tegra AHCI SATA support" - depends on ARCH_TEGRA + depends on ARCH_TEGRA || COMPILE_TEST select SATA_HOST help This option enables support for the NVIDIA Tegra SoC's From b7c9b00fb050c6b3fea6e32f1adbe0194296eb1f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:09:23 +0900 Subject: [PATCH 400/493] ata: ahci_xgene: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable PHY_XGENE. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 49ce1e0d19d7..f96a29b1c8a3 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -256,7 +256,7 @@ config AHCI_TEGRA config AHCI_XGENE tristate "APM X-Gene 6.0Gbps AHCI SATA host controller support" - depends on PHY_XGENE + depends on PHY_XGENE || COMPILE_TEST select SATA_HOST help This option enables support for APM X-Gene SoC SATA host controller. From a33a348d0aca38107c435eef20c449cf13dd9447 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:11:15 +0900 Subject: [PATCH 401/493] ata: ahci_seattle: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_SEATTLE. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index f96a29b1c8a3..ff5bb8e0d601 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -294,7 +294,7 @@ config SATA_GEMINI config SATA_AHCI_SEATTLE tristate "AMD Seattle 6.0Gbps AHCI SATA host controller support" - depends on ARCH_SEATTLE + depends on ARCH_SEATTLE || COMPILE_TEST select SATA_HOST help This option enables support for AMD Seattle SATA host controller. From a3d11c275b647b5b56b907011b432e00f7ddb683 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:17:42 +0900 Subject: [PATCH 402/493] ata: pata_bk3710: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_DAVINCI. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index ff5bb8e0d601..8e211b21f48f 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -599,7 +599,7 @@ config PATA_ATP867X config PATA_BK3710 tristate "Palmchip BK3710 PATA support" - depends on ARCH_DAVINCI + depends on ARCH_DAVINCI || COMPILE_TEST select PATA_TIMINGS help This option enables support for the integrated IDE controller on From e5b48ee30aec1fe6dff05e36b22e886c665b4736 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 16:14:46 +0900 Subject: [PATCH 403/493] ata: sata_fsl: fix scsi host initialization When compiling with W=1, the sata_fsl driver compilation throws the warning: drivers/ata/sata_fsl.c:1385:22: error: initialized field overwritten [-Werror=override-init] 1385 | .can_queue = SATA_FSL_QUEUE_DEPTH, This is due to the driver scsi host template initialization overwriting the can_queue field that is already set using the ATA_NCQ_SHT() initializer macro, resulting in the same field being initialized twice in the host template declaration. To remove this warning, introduce the ATA_SUBBASE_SHT_QD() and ATA_NCQ_SHT_QD() initialization macros to allow specifying a queue depth different from the default ATA_DEF_QUEUE using an additional argument to the macro. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/sata_fsl.c | 3 +-- include/linux/libata.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 142e65d5efc7..101d4dd79f62 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1380,8 +1380,7 @@ static void sata_fsl_host_stop(struct ata_host *host) * scsi mid-layer and libata interface structures */ static struct scsi_host_template sata_fsl_sht = { - ATA_NCQ_SHT("sata_fsl"), - .can_queue = SATA_FSL_QUEUE_DEPTH, + ATA_NCQ_SHT_QD("sata_fsl", SATA_FSL_QUEUE_DEPTH), .sg_tablesize = SATA_FSL_MAX_PRD_USABLE, .dma_boundary = ATA_DMA_BOUNDARY, }; diff --git a/include/linux/libata.h b/include/linux/libata.h index c258f69106f4..2e5e7c40c991 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1385,6 +1385,12 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .slave_configure = ata_scsi_slave_config +#define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \ + __ATA_BASE_SHT(drv_name), \ + .can_queue = drv_qd, \ + .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ + .slave_configure = ata_scsi_slave_config + #define ATA_BASE_SHT(drv_name) \ ATA_SUBBASE_SHT(drv_name), \ .sdev_groups = ata_common_sdev_groups @@ -1396,6 +1402,11 @@ extern const struct attribute_group *ata_ncq_sdev_groups[]; ATA_SUBBASE_SHT(drv_name), \ .sdev_groups = ata_ncq_sdev_groups, \ .change_queue_depth = ata_scsi_change_queue_depth + +#define ATA_NCQ_SHT_QD(drv_name, drv_qd) \ + ATA_SUBBASE_SHT_QD(drv_name, drv_qd), \ + .sdev_groups = ata_ncq_sdev_groups, \ + .change_queue_depth = ata_scsi_change_queue_depth #endif /* From f8bc938ee6c60ec862fb5311789b6e277555f0b0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 19:04:49 +0900 Subject: [PATCH 404/493] ata: sata_fsl: fix cmdhdr_tbl_entry and prde struct definitions The fields of the cmdhdr_tbl_entry structure all store __le32 values, and so are the dba and ddc_and_ext fields of the prde structure. Define these fields using the __le32 type to avoid sparse warnings about incorrect type in assignment. The debug message in sata_fsl_setup_cmd_hdr_entry() is changed to display the correct values of the cmdhdr_tbl_entry fields on big endian systems. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/sata_fsl.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 101d4dd79f62..da0152116d9f 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -221,10 +221,10 @@ enum { * 4 Dwords per command slot, command header size == 64 Dwords. */ struct cmdhdr_tbl_entry { - u32 cda; - u32 prde_fis_len; - u32 ttl; - u32 desc_info; + __le32 cda; + __le32 prde_fis_len; + __le32 ttl; + __le32 desc_info; }; /* @@ -259,9 +259,9 @@ struct command_desc { */ struct prde { - u32 dba; + __le32 dba; u8 fill[2 * 4]; - u32 ddc_and_ext; + __le32 ddc_and_ext; }; /* @@ -426,10 +426,10 @@ static void sata_fsl_setup_cmd_hdr_entry(struct ata_port *ap, pp->cmdslot[tag].desc_info = cpu_to_le32(desc_info | (tag & 0x1F)); ata_port_dbg(ap, "cda=0x%x, prde_fis_len=0x%x, ttl=0x%x, di=0x%x\n", - pp->cmdslot[tag].cda, - pp->cmdslot[tag].prde_fis_len, - pp->cmdslot[tag].ttl, pp->cmdslot[tag].desc_info); - + le32_to_cpu(pp->cmdslot[tag].cda), + le32_to_cpu(pp->cmdslot[tag].prde_fis_len), + le32_to_cpu(pp->cmdslot[tag].ttl), + le32_to_cpu(pp->cmdslot[tag].desc_info)); } static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc, From 2bce69072a0db6c3444650023c6f35bfd7a23d29 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 17:49:54 +0900 Subject: [PATCH 405/493] ata: ahci_xgene: use correct type for port mmio address Sparse complains about an incorrect type for port_mmio pointer variables: drivers/ata/ahci_xgene.c:196:41: warning: incorrect type in initializer (different address spaces) drivers/ata/ahci_xgene.c:196:41: expected void *port_mmio drivers/ata/ahci_xgene.c:196:41: got void [noderef] __iomem * Fix this by declaring port_mmio as "void __iomem *" instead of "void *". Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/ahci_xgene.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index 4d8a186ec12a..68ec7e9430b2 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -193,7 +193,7 @@ static unsigned int xgene_ahci_qc_issue(struct ata_queued_cmd *qc) struct xgene_ahci_context *ctx = hpriv->plat_data; int rc = 0; u32 port_fbs; - void *port_mmio = ahci_port_base(ap); + void __iomem *port_mmio = ahci_port_base(ap); /* * Write the pmp value to PxFBS.DEV @@ -454,7 +454,7 @@ static int xgene_ahci_pmp_softreset(struct ata_link *link, unsigned int *class, int pmp = sata_srst_pmp(link); struct ata_port *ap = link->ap; u32 rc; - void *port_mmio = ahci_port_base(ap); + void __iomem *port_mmio = ahci_port_base(ap); u32 port_fbs; /* @@ -499,7 +499,7 @@ static int xgene_ahci_softreset(struct ata_link *link, unsigned int *class, struct ata_port *ap = link->ap; struct ahci_host_priv *hpriv = ap->host->private_data; struct xgene_ahci_context *ctx = hpriv->plat_data; - void *port_mmio = ahci_port_base(ap); + void __iomem *port_mmio = ahci_port_base(ap); u32 port_fbs; u32 port_fbs_save; u32 retry = 1; From 0561e514c944da874ccdfbe2922f71b4c333c7e1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 17:54:18 +0900 Subject: [PATCH 406/493] ata: fix read_id() ata port operation interface Drivers that need to tweak a device IDENTIFY data implement the read_id() port operation. The IDENTIFY data buffer is passed as an argument to the read_id() operation for drivers to use. However, when this operation is called, the IDENTIFY data is not yet converted to CPU endian and contains le16 words. Change the interface of the read_id operation to pass a __le16 * pointer to the IDENTIFY data buffer to clarify the buffer endianness. Fix the pata_netcell, pata_it821x, ahci_xgene, ahci_ceva and ahci_brcm drivers implementation of this operation and modify the code to corretly deal with identify data words manipulation to avoid sparse warnings such as: drivers/ata/ahci_xgene.c:262:33: warning: invalid assignment: &= drivers/ata/ahci_xgene.c:262:33: left side has type unsigned short drivers/ata/ahci_xgene.c:262:33: right side has type restricted __le16 Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/ahci_brcm.c | 2 +- drivers/ata/ahci_ceva.c | 5 ++--- drivers/ata/ahci_xgene.c | 2 +- drivers/ata/libata-core.c | 6 +++--- drivers/ata/pata_it821x.c | 23 +++++++++++------------ drivers/ata/pata_netcell.c | 5 +++-- include/linux/libata.h | 5 +++-- 7 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index 6e9c5ade4c2e..ba695338927a 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -246,7 +246,7 @@ static void brcm_sata_init(struct brcm_ahci_priv *priv) } static unsigned int brcm_ahci_read_id(struct ata_device *dev, - struct ata_taskfile *tf, u16 *id) + struct ata_taskfile *tf, __le16 *id) { struct ata_port *ap = dev->link->ap; struct ata_host *host = ap->host; diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index e9c7c07fd84c..acf59f51b356 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -92,9 +92,8 @@ struct ceva_ahci_priv { }; static unsigned int ceva_ahci_read_id(struct ata_device *dev, - struct ata_taskfile *tf, u16 *id) + struct ata_taskfile *tf, __le16 *id) { - __le16 *__id = (__le16 *)id; u32 err_mask; err_mask = ata_do_dev_read_id(dev, tf, id); @@ -104,7 +103,7 @@ static unsigned int ceva_ahci_read_id(struct ata_device *dev, * Since CEVA controller does not support device sleep feature, we * need to clear DEVSLP (bit 8) in word78 of the IDENTIFY DEVICE data. */ - __id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8)); + id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8)); return 0; } diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index 68ec7e9430b2..8e206379d699 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -237,7 +237,7 @@ static bool xgene_ahci_is_memram_inited(struct xgene_ahci_context *ctx) * does not support DEVSLP. */ static unsigned int xgene_ahci_read_id(struct ata_device *dev, - struct ata_taskfile *tf, u16 *id) + struct ata_taskfile *tf, __le16 *id) { u32 err_mask; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 9c2947905d1e..67f88027680a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1722,7 +1722,7 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev) * this function is wrapped or replaced by the driver */ unsigned int ata_do_dev_read_id(struct ata_device *dev, - struct ata_taskfile *tf, u16 *id) + struct ata_taskfile *tf, __le16 *id) { return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, sizeof(id[0]) * ATA_ID_WORDS, 0); @@ -1795,9 +1795,9 @@ retry: tf.flags |= ATA_TFLAG_POLLING; if (ap->ops->read_id) - err_mask = ap->ops->read_id(dev, &tf, id); + err_mask = ap->ops->read_id(dev, &tf, (__le16 *)id); else - err_mask = ata_do_dev_read_id(dev, &tf, id); + err_mask = ata_do_dev_read_id(dev, &tf, (__le16 *)id); if (err_mask) { if (err_mask & AC_ERR_NODEV_HINT) { diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c index b77ef0046dbe..8a5b4e0079ab 100644 --- a/drivers/ata/pata_it821x.c +++ b/drivers/ata/pata_it821x.c @@ -537,7 +537,7 @@ static void it821x_dev_config(struct ata_device *adev) */ static unsigned int it821x_read_id(struct ata_device *adev, - struct ata_taskfile *tf, u16 *id) + struct ata_taskfile *tf, __le16 *id) { unsigned int err_mask; unsigned char model_num[ATA_ID_PROD_LEN + 1]; @@ -545,21 +545,20 @@ static unsigned int it821x_read_id(struct ata_device *adev, err_mask = ata_do_dev_read_id(adev, tf, id); if (err_mask) return err_mask; - ata_id_c_string(id, model_num, ATA_ID_PROD, sizeof(model_num)); + ata_id_c_string((u16 *)id, model_num, ATA_ID_PROD, sizeof(model_num)); - id[83] &= ~(1 << 12); /* Cache flush is firmware handled */ - id[83] &= ~(1 << 13); /* Ditto for LBA48 flushes */ - id[84] &= ~(1 << 6); /* No FUA */ - id[85] &= ~(1 << 10); /* No HPA */ - id[76] = 0; /* No NCQ/AN etc */ + id[83] &= cpu_to_le16(~(1 << 12)); /* Cache flush is firmware handled */ + id[84] &= cpu_to_le16(~(1 << 6)); /* No FUA */ + id[85] &= cpu_to_le16(~(1 << 10)); /* No HPA */ + id[76] = 0; /* No NCQ/AN etc */ if (strstr(model_num, "Integrated Technology Express")) { /* Set feature bits the firmware neglects */ - id[49] |= 0x0300; /* LBA, DMA */ - id[83] &= 0x7FFF; - id[83] |= 0x4400; /* Word 83 is valid and LBA48 */ - id[86] |= 0x0400; /* LBA48 on */ - id[ATA_ID_MAJOR_VER] |= 0x1F; + id[49] |= cpu_to_le16(0x0300); /* LBA, DMA */ + id[83] &= cpu_to_le16(0x7FFF); + id[83] |= cpu_to_le16(0x4400); /* Word 83 is valid and LBA48 */ + id[86] |= cpu_to_le16(0x0400); /* LBA48 on */ + id[ATA_ID_MAJOR_VER] |= cpu_to_le16(0x1F); /* Clear the serial number because it's different each boot which breaks validation on resume */ memset(&id[ATA_ID_SERNO], 0x20, ATA_ID_SERNO_LEN); diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c index a7ecc1a204b5..06929e77c491 100644 --- a/drivers/ata/pata_netcell.c +++ b/drivers/ata/pata_netcell.c @@ -21,12 +21,13 @@ /* No PIO or DMA methods needed for this device */ static unsigned int netcell_read_id(struct ata_device *adev, - struct ata_taskfile *tf, u16 *id) + struct ata_taskfile *tf, __le16 *id) { unsigned int err_mask = ata_do_dev_read_id(adev, tf, id); + /* Firmware forgets to mark words 85-87 valid */ if (err_mask == 0) - id[ATA_ID_CSF_DEFAULT] |= 0x4000; + id[ATA_ID_CSF_DEFAULT] |= cpu_to_le16(0x4000); return err_mask; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 2e5e7c40c991..bf706cd45674 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -884,7 +884,8 @@ struct ata_port_operations { void (*set_piomode)(struct ata_port *ap, struct ata_device *dev); void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev); int (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev); - unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); + unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, + __le16 *id); void (*dev_config)(struct ata_device *dev); @@ -1119,7 +1120,7 @@ extern void ata_id_string(const u16 *id, unsigned char *s, extern void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern unsigned int ata_do_dev_read_id(struct ata_device *dev, - struct ata_taskfile *tf, u16 *id); + struct ata_taskfile *tf, __le16 *id); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern u64 ata_qc_get_active(struct ata_port *ap); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); From 9c2fd3fb43bdf2641093fe287d1944ec3c88eeda Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Tue, 4 Jan 2022 11:25:45 +0000 Subject: [PATCH 407/493] ata: pata_octeon_cf: remove redundant val variable Return value from DIV_ROUND_UP() directly instead of taking this in another redundant variable. Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: CGEL ZTE Signed-off-by: Damien Le Moal --- drivers/ata/pata_octeon_cf.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index df62e22b49a6..0912846bc1b0 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -73,16 +73,12 @@ MODULE_PARM_DESC(enable_dma, */ static unsigned int ns_to_tim_reg(unsigned int tim_mult, unsigned int nsecs) { - unsigned int val; - /* * Compute # of eclock periods to get desired duration in * nanoseconds. */ - val = DIV_ROUND_UP(nsecs * (octeon_get_io_clock_rate() / 1000000), + return DIV_ROUND_UP(nsecs * (octeon_get_io_clock_rate() / 1000000), 1000 * tim_mult); - - return val; } static void octeon_cf_set_boot_reg_cfg(int cs, unsigned int multiplier) From dc5d7b3cfd7833d41c2e2fad5fd5af5c95d05d04 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:19:30 +0900 Subject: [PATCH 408/493] ata: pata_cs5535: add compile test support Add Kconfig dependendy on X86_64 && COMPILE_TEST to allow compile tests with configs that do not have X86_32 enabled on X86_64 hosts. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 8e211b21f48f..2c381c59357b 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -637,7 +637,7 @@ config PATA_CS5530 config PATA_CS5535 tristate "CS5535 PATA support (Experimental)" - depends on PCI && X86_32 + depends on PCI && (X86_32 || (X86_64 && COMPILE_TEST)) help This option enables support for the NatSemi/AMD CS5535 companion chip used with the Geode processor family. From 2aa566716f43776aee1cb46b3bb40af67b080d06 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:24:52 +0900 Subject: [PATCH 409/493] ata: pata_ftide010: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARM. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 2c381c59357b..f0e59ab5c11f 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -685,7 +685,7 @@ config PATA_EP93XX config PATA_FTIDE010 tristate "Faraday Technology FTIDE010 PATA support" depends on OF - depends on ARM + depends on ARM || COMPILE_TEST depends on SATA_GEMINI help This option enables support for the Faraday FTIDE010 From 7dc3c053bddf735b305bacfc620aa5cf6874ffe6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:31:34 +0900 Subject: [PATCH 410/493] ata: pata_imx: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_MXC. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index f0e59ab5c11f..a07cbc46ee60 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -748,7 +748,7 @@ config PATA_ICSIDE config PATA_IMX tristate "PATA support for Freescale iMX" - depends on ARCH_MXC + depends on ARCH_MXC || COMPILE_TEST select PATA_TIMINGS help This option enables support for the PATA host available on Freescale From 7767c73a3565ae975e7f1de7900815be4267cc3c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:35:57 +0900 Subject: [PATCH 411/493] ata: pata_pxa: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable ARCH_PXA. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index a07cbc46ee60..be812fe727fc 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -969,7 +969,7 @@ config PATA_VIA config PATA_PXA tristate "PXA DMA-capable PATA support" - depends on ARCH_PXA + depends on ARCH_PXA || COMPILE_TEST help This option enables support for harddrive attached to PXA CPU's bus. From b6a64a860e1319dfbabc55b351c8b6583bd67413 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 15:48:17 +0900 Subject: [PATCH 412/493] ata: pata_samsung_cf: add compile test support Add Kconfig dependendy on COMPILE_TEST to allow compile tests with configs that do not enable SAMSUNG_DEV_IDE. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index be812fe727fc..cb54631fd950 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -1145,7 +1145,7 @@ config PATA_RZ1000 config PATA_SAMSUNG_CF tristate "Samsung SoC PATA support" - depends on SAMSUNG_DEV_IDE + depends on SAMSUNG_DEV_IDE || COMPILE_TEST select PATA_TIMINGS help This option enables basic support for Samsung's S3C/S5P board From db6a3f47cecc3da00d13fc68738aaa96e31f7c04 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 5 Jan 2022 18:17:21 +0000 Subject: [PATCH 413/493] ata: pata_of_platform: Use platform_get_irq_optional() to get the interrupt platform_get_resource(pdev, IORESOURCE_IRQ, ..) relies on static allocation of IRQ resources in DT core code, this causes an issue when using hierarchical interrupt domains using "interrupts" property in the node as this bypasses the hierarchical setup and messes up the irq chaining. In preparation for removal of static setup of IRQ resource from DT core code use platform_get_irq_optional(). Note the code does not set the IRQ flags as this is handled automatically for DT. Signed-off-by: Lad Prabhakar Reviewed-by: Andy Shevchenko Signed-off-by: Damien Le Moal --- drivers/ata/pata_of_platform.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c index 35aa158fc976..c3a40b717dcd 100644 --- a/drivers/ata/pata_of_platform.c +++ b/drivers/ata/pata_of_platform.c @@ -25,11 +25,12 @@ static int pata_of_platform_probe(struct platform_device *ofdev) struct device_node *dn = ofdev->dev.of_node; struct resource io_res; struct resource ctl_res; - struct resource *irq_res; + struct resource irq_res; unsigned int reg_shift = 0; int pio_mode = 0; int pio_mask; bool use16bit; + int irq; ret = of_address_to_resource(dn, 0, &io_res); if (ret) { @@ -45,7 +46,15 @@ static int pata_of_platform_probe(struct platform_device *ofdev) return -EINVAL; } - irq_res = platform_get_resource(ofdev, IORESOURCE_IRQ, 0); + memset(&irq_res, 0, sizeof(irq_res)); + + irq = platform_get_irq_optional(ofdev, 0); + if (irq < 0 && irq != -ENXIO) + return irq; + if (irq > 0) { + irq_res.start = irq; + irq_res.end = irq; + } of_property_read_u32(dn, "reg-shift", ®_shift); @@ -63,7 +72,7 @@ static int pata_of_platform_probe(struct platform_device *ofdev) pio_mask = 1 << pio_mode; pio_mask |= (1 << pio_mode) - 1; - return __pata_platform_probe(&ofdev->dev, &io_res, &ctl_res, irq_res, + return __pata_platform_probe(&ofdev->dev, &io_res, &ctl_res, irq > 0 ? &irq_res : NULL, reg_shift, pio_mask, &pata_platform_sht, use16bit); } From 84eac327af543f03172085d5ef9f98ea25a51191 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Wed, 5 Jan 2022 19:13:54 -0500 Subject: [PATCH 414/493] ata: libata-scsi: simplify __ata_scsi_queuecmd() This patch cleans up the code of __ata_scsi_queuecmd(). Since each branch of the "if" condition check that scmd->cmd_len is not zero, move this check out of the "if" to simplify the conditions being checked in the "else" branch. While at it, avoid the if-else-if-else structure using if-else if structure and remove the redundant rc local variable. This patch does not change the function logic. Signed-off-by: Wenchao Hao Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 45 ++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a16ef0030667..ed8be585a98f 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3958,42 +3958,39 @@ int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev) { u8 scsi_op = scmd->cmnd[0]; ata_xlat_func_t xlat_func; - int rc = 0; + + if (unlikely(!scmd->cmd_len)) + goto bad_cdb_len; if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) { - if (unlikely(!scmd->cmd_len || scmd->cmd_len > dev->cdb_len)) + if (unlikely(scmd->cmd_len > dev->cdb_len)) goto bad_cdb_len; xlat_func = ata_get_xlat_func(dev, scsi_op); - } else { - if (unlikely(!scmd->cmd_len)) + } else if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { + /* relay SCSI command to ATAPI device */ + int len = COMMAND_SIZE(scsi_op); + + if (unlikely(len > scmd->cmd_len || + len > dev->cdb_len || + scmd->cmd_len > ATAPI_CDB_LEN)) goto bad_cdb_len; - xlat_func = NULL; - if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { - /* relay SCSI command to ATAPI device */ - int len = COMMAND_SIZE(scsi_op); - if (unlikely(len > scmd->cmd_len || - len > dev->cdb_len || - scmd->cmd_len > ATAPI_CDB_LEN)) - goto bad_cdb_len; + xlat_func = atapi_xlat; + } else { + /* ATA_16 passthru, treat as an ATA command */ + if (unlikely(scmd->cmd_len > 16)) + goto bad_cdb_len; - xlat_func = atapi_xlat; - } else { - /* ATA_16 passthru, treat as an ATA command */ - if (unlikely(scmd->cmd_len > 16)) - goto bad_cdb_len; - - xlat_func = ata_get_xlat_func(dev, scsi_op); - } + xlat_func = ata_get_xlat_func(dev, scsi_op); } if (xlat_func) - rc = ata_scsi_translate(dev, scmd, xlat_func); - else - ata_scsi_simulate(dev, scmd); + return ata_scsi_translate(dev, scmd, xlat_func); - return rc; + ata_scsi_simulate(dev, scmd); + + return 0; bad_cdb_len: scmd->result = DID_ERROR << 16; From b9ba367c513dbc165dd6c01266a59db4be2a3564 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Wed, 5 Jan 2022 16:36:16 +0100 Subject: [PATCH 415/493] ata: libata: Rename link flag ATA_LFLAG_NO_DB_DELAY Rename the link flag ATA_LFLAG_NO_DB_DELAY to ATA_LFLAG_NO_DEBOUNCE_DELAY. The new name is longer, but clearer. Signed-off-by: Paul Menzel Signed-off-by: Damien Le Moal --- drivers/ata/ahci_brcm.c | 2 +- drivers/ata/libata-sata.c | 2 +- include/linux/libata.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index ba695338927a..64dd8aa397d5 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -333,7 +333,7 @@ static struct ata_port_operations ahci_brcm_platform_ops = { static const struct ata_port_info ahci_brcm_port_info = { .flags = AHCI_FLAG_COMMON | ATA_FLAG_NO_DIPM, - .link_flags = ATA_LFLAG_NO_DB_DELAY, + .link_flags = ATA_LFLAG_NO_DEBOUNCE_DELAY, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, .port_ops = &ahci_brcm_platform_ops, diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index bfe9595d4f33..071158c0c44c 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -317,7 +317,7 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params, * immediately after resuming. Delay 200ms before * debouncing. */ - if (!(link->flags & ATA_LFLAG_NO_DB_DELAY)) + if (!(link->flags & ATA_LFLAG_NO_DEBOUNCE_DELAY)) ata_msleep(link->ap, 200); /* is SControl restored correctly? */ diff --git a/include/linux/libata.h b/include/linux/libata.h index bf706cd45674..605756f645be 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -143,7 +143,7 @@ enum { ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */ ATA_LFLAG_RST_ONCE = (1 << 9), /* limit recovery to one reset */ ATA_LFLAG_CHANGED = (1 << 10), /* LPM state changed on this link */ - ATA_LFLAG_NO_DB_DELAY = (1 << 11), /* no debounce delay on link resume */ + ATA_LFLAG_NO_DEBOUNCE_DELAY = (1 << 11), /* no debounce delay on link resume */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ From a17ab7aba5df4135ef77d7f6d7105e1ea414936f Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Wed, 5 Jan 2022 16:36:18 +0100 Subject: [PATCH 416/493] ata: ahci: Add support for AMD A85 FCH (Hudson D4) Add support for the AMD A85 FCH (Hudson D4) AHCI adapter. Since this adapter does not require the default 200 ms debounce delay in sata_link_resume(), create a new board board_ahci_no_debounce_delay with the link flag ATA_LFLAG_NO_DEBOUNCE_DELAY, and, for now, configure the AMD A85 FCH (Hudson D4) to use it. On the ASUS F2A85-M PRO it reduces the Linux kernel boot time by the expected 200 ms from 787 ms to 585 ms. Signed-off-by: Paul Menzel Cc: Tejun Heo Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 63bafb610fbd..ab5811ef5a53 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -51,6 +51,7 @@ enum board_ids { board_ahci, board_ahci_ign_iferr, board_ahci_mobile, + board_ahci_no_debounce_delay, board_ahci_nomsi, board_ahci_noncq, board_ahci_nosntf, @@ -141,6 +142,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_no_debounce_delay] = { + .flags = AHCI_FLAG_COMMON, + .link_flags = ATA_LFLAG_NO_DEBOUNCE_DELAY, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_nomsi] = { AHCI_HFLAGS (AHCI_HFLAG_NO_MSI), .flags = AHCI_FLAG_COMMON, @@ -437,6 +445,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { board_ahci_al }, /* AMD */ { PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */ + { PCI_VDEVICE(AMD, 0x7801), board_ahci_no_debounce_delay }, /* AMD Hudson-2 (AHCI mode) */ { PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */ { PCI_VDEVICE(AMD, 0x7901), board_ahci_mobile }, /* AMD Green Sardine */ /* AMD is using RAID class only for ahci controllers */ From 237fe8885a3fdab169bf670790c9f40046af45d3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 12 Jan 2022 23:47:41 +0000 Subject: [PATCH 417/493] ata: pata_ali: remove redundant return statement A return statement is unnecessarily complicated, currently value in variable mask is bitwise-masked and the variable is being updated and then returned. Just updating the mask is all that is required as the following statement is a return. Signed-off-by: Colin Ian King Signed-off-by: Damien Le Moal --- drivers/ata/pata_ali.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c index ab28a6707b94..1b90cda27246 100644 --- a/drivers/ata/pata_ali.c +++ b/drivers/ata/pata_ali.c @@ -123,7 +123,7 @@ static unsigned long ali_20_filter(struct ata_device *adev, unsigned long mask) mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA); ata_id_c_string(adev->id, model_num, ATA_ID_PROD, sizeof(model_num)); if (strstr(model_num, "WDC")) - return mask &= ~ATA_MASK_UDMA; + mask &= ~ATA_MASK_UDMA; return mask; } From e652ab64e5846d3fe5ac2c0405d55d79ecc52c36 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 7 Aug 2020 08:45:47 -0300 Subject: [PATCH 418/493] tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: 89aa94b4a218339b ("x86/msr: Add AMD CPPC MSR definitions") Addressing these tools/perf build warnings: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' That makes the beautification scripts to pick some new entries: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2022-01-13 10:59:51.743416890 -0300 +++ after 2022-01-13 11:00:00.776644178 -0300 @@ -303,6 +303,11 @@ [0xc0010299 - x86_AMD_V_KVM_MSRs_offset] = "AMD_RAPL_POWER_UNIT", [0xc001029a - x86_AMD_V_KVM_MSRs_offset] = "AMD_CORE_ENERGY_STATUS", [0xc001029b - x86_AMD_V_KVM_MSRs_offset] = "AMD_PKG_ENERGY_STATUS", + [0xc00102b0 - x86_AMD_V_KVM_MSRs_offset] = "AMD_CPPC_CAP1", + [0xc00102b1 - x86_AMD_V_KVM_MSRs_offset] = "AMD_CPPC_ENABLE", + [0xc00102b2 - x86_AMD_V_KVM_MSRs_offset] = "AMD_CPPC_CAP2", + [0xc00102b3 - x86_AMD_V_KVM_MSRs_offset] = "AMD_CPPC_REQ", + [0xc00102b4 - x86_AMD_V_KVM_MSRs_offset] = "AMD_CPPC_STATUS", [0xc00102f0 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN_CTL", [0xc00102f1 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN", }; $ And this gets rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o INSTALL trace_plugins LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf Now one can trace systemwide asking to see backtraces to where those MSRs are being read/written with: # perf trace -e msr:*_msr/max-stack=32/ --filter="msr>=AMD_CPPC_CAP1 && msr<=AMD_CPPC_STATUS" ^C# If we use -v (verbose mode) we can see what it does behind the scenes: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr>=AMD_CPPC_CAP1 && msr<=AMD_CPPC_STATUS" New filter for msr:read_msr: (msr>=0xc00102b0 && msr<=0xc00102b4) && (common_pid != 2612102 && common_pid != 3841) New filter for msr:write_msr: (msr>=0xc00102b0 && msr<=0xc00102b4) && (common_pid != 2612102 && common_pid != 3841) ^C# Example with a frequent msr: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_SPEC_CTRL" --max-events 2 Using CPUID AuthenticAMD-25-21-0 0x48 New filter for msr:read_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) 0x48 New filter for msr:write_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) mmap size 528384B Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols 0.000 Timer/2525383 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule ([kernel.kallsyms]) futex_wait_queue_me ([kernel.kallsyms]) futex_wait ([kernel.kallsyms]) do_futex ([kernel.kallsyms]) __x64_sys_futex ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __futex_abstimed_wait_common64 (/usr/lib64/libpthread-2.33.so) 0.030 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL, val: 2) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule_idle ([kernel.kallsyms]) do_idle ([kernel.kallsyms]) cpu_startup_entry ([kernel.kallsyms]) secondary_startup_64_no_verify ([kernel.kallsyms]) # Acked-by: Huang Rui Acked-by: Rafael J. Wysocki Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/all/YeA2PAvHV+uHRhLj@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 01e2650b9585..3faf0f97edb1 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -486,6 +486,23 @@ #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f +/* AMD Collaborative Processor Performance Control MSRs */ +#define MSR_AMD_CPPC_CAP1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define MSR_AMD_CPPC_CAP2 0xc00102b2 +#define MSR_AMD_CPPC_REQ 0xc00102b3 +#define MSR_AMD_CPPC_STATUS 0xc00102b4 + +#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) +#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) +#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) +#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) + +#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) +#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) +#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) +#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 From 46f57d2410150985f81da7cbbb5fdcda01d02ac2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Jan 2022 22:48:22 -0800 Subject: [PATCH 419/493] perf arm: Fix off-by-one directory path Relative path include works in the regular build due to -I paths but may fail in other situations. Fixes: 83869019c74cc2d0 ("perf arch: Support register names from all archs") Reviewed-by: German Gomez Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexandre Truong Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220114064822.1806019-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm64-frame-pointer-unwind-support.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 4f5ecf51ed38..2242a885fbd7 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -6,7 +6,7 @@ #include "unwind.h" #define perf_event_arm_regs perf_event_arm64_regs -#include "../arch/arm64/include/uapi/asm/perf_regs.h" +#include "../../arch/arm64/include/uapi/asm/perf_regs.h" #undef perf_event_arm_regs struct entries { From 99fc11bb5b6f19d2c3671d6cf38571cb3dedb472 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Jan 2022 22:51:05 -0800 Subject: [PATCH 420/493] libperf tests: Update a use of the new cpumap API Fixes a build breakage. Fixes: 6d18804b963b78dc ("perf cpumap: Give CPUs their own type") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: colin ian king Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shunsuke Nakamura Link: http://lore.kernel.org/lkml/20220114065105.1806542-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index e7afff12c35a..b3479dfa9a1c 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -334,7 +334,8 @@ static int test_mmap_cpus(void) }; cpu_set_t saved_mask; char path[PATH_MAX]; - int id, err, cpu, tmp; + int id, err, tmp; + struct perf_cpu cpu; union perf_event *event; int count = 0; @@ -377,7 +378,7 @@ static int test_mmap_cpus(void) cpu_set_t mask; CPU_ZERO(&mask); - CPU_SET(cpu, &mask); + CPU_SET(cpu.cpu, &mask); err = sched_setaffinity(0, sizeof(mask), &mask); __T("sched_setaffinity failed", err == 0); From e000ea0beffb5497425054b151369fe37a792ece Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Wed, 8 Dec 2021 18:11:13 +0100 Subject: [PATCH 421/493] perf metricgroup: Fix use after free in metric__new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't free() something that will be used in the next line, fix it. Fixes: b85a4d61d3022608 ("perf metric: Allow modifiers on metrics") Addresses-Coverity-ID: 1494000 Signed-off-by: José Expósito Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211208171113.22089-1-jose.exposito89@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 51c99cb08abf..8826c555f780 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -209,8 +209,8 @@ static struct metric *metric__new(const struct pmu_event *pe, m->metric_name = pe->metric_name; m->modifier = modifier ? strdup(modifier) : NULL; if (modifier && !m->modifier) { - free(m); expr__ctx_free(m->pctx); + free(m); return NULL; } m->metric_expr = pe->metric_expr; From a6e62743621ea29bea461774c0bcc68e5de59068 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 24 Nov 2021 10:03:43 +0100 Subject: [PATCH 422/493] perf cputopo: Fix CPU topology reading on s/390 Commit fdf1e29b6118c18f ("perf expr: Add metric literals for topology.") fails on s390: # ./perf test -Fv 7 ... # FAILED tests/expr.c:173 #num_dies >= #num_packages ---- end ---- Simple expression parser: FAILED! # Investigating this issue leads to these functions: build_cpu_topology() +--> has_die_topology(void) { struct utsname uts; if (uname(&uts) < 0) return false; if (strncmp(uts.machine, "x86_64", 6)) return false; .... } which always returns false on s390. The caller build_cpu_topology() checks has_die_topology() return value. On false the the struct cpu_topology::die_cpu_list is not contructed and has zero entries. This leads to the failing comparison: #num_dies >= #num_packages. s390 of course has a positive number of packages. Fix this by adding s390 architecture to support CPU die list. Output after: # ./perf test -Fv 7 7: Simple expression parser : --- start --- division by zero syntax error ---- end ---- Simple expression parser: Ok # Fixes: fdf1e29b6118c18f ("perf expr: Add metric literals for topology.") Reviewed-by: Ian Rogers Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Ian Rogers Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20211124090343.9436-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cputopo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 84ca106a3246..e20b835a1194 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -165,7 +165,8 @@ static bool has_die_topology(void) if (uname(&uts) < 0) return false; - if (strncmp(uts.machine, "x86_64", 6)) + if (strncmp(uts.machine, "x86_64", 6) && + strncmp(uts.machine, "s390x", 5)) return false; scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, From f346f32701ebacf6fe397f6f1d254256f73da321 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 14 Jan 2022 19:11:21 +0100 Subject: [PATCH 423/493] MAINTAINERS: Add Helge as fbdev maintainer The fbdev layer is orphaned, but seems to need some care. So I'd like to step up as new maintainer. Signed-off-by: Helge Deller Acked-by: Geert Uytterhoeven --- MAINTAINERS | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index dd36acc87ce6..0917ab803ccd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7508,11 +7508,12 @@ W: http://floatingpoint.sourceforge.net/emulator/index.html F: arch/x86/math-emu/ FRAMEBUFFER LAYER -L: dri-devel@lists.freedesktop.org +M: Helge Deller L: linux-fbdev@vger.kernel.org -S: Orphan +L: dri-devel@lists.freedesktop.org +S: Maintained Q: http://patchwork.kernel.org/project/linux-fbdev/list/ -T: git git://anongit.freedesktop.org/drm/drm-misc +T: git git://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git F: Documentation/fb/ F: drivers/video/ F: include/linux/fb.h From d9679d0013a66849f23057978f92e76b255c50aa Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 13 Oct 2021 06:55:44 -0400 Subject: [PATCH 424/493] virtio: wrap config->reset calls This will enable cleanups down the road. The idea is to disable cbs, then add "flush_queued_cbs" callback as a parameter, this way drivers can flush any work queued after callbacks have been disabled. Signed-off-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20211013105226.20225-1-mst@redhat.com Signed-off-by: Michael S. Tsirkin --- arch/um/drivers/virt-pci.c | 2 +- drivers/block/virtio_blk.c | 4 ++-- drivers/bluetooth/virtio_bt.c | 2 +- drivers/char/hw_random/virtio-rng.c | 2 +- drivers/char/virtio_console.c | 4 ++-- drivers/crypto/virtio/virtio_crypto_core.c | 8 ++++---- drivers/firmware/arm_scmi/virtio.c | 2 +- drivers/gpio/gpio-virtio.c | 2 +- drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +- drivers/i2c/busses/i2c-virtio.c | 2 +- drivers/iommu/virtio-iommu.c | 2 +- drivers/net/caif/caif_virtio.c | 2 +- drivers/net/virtio_net.c | 4 ++-- drivers/net/wireless/mac80211_hwsim.c | 2 +- drivers/nvdimm/virtio_pmem.c | 2 +- drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- drivers/scsi/virtio_scsi.c | 2 +- drivers/virtio/virtio.c | 6 ++++++ drivers/virtio/virtio_balloon.c | 2 +- drivers/virtio/virtio_input.c | 2 +- drivers/virtio/virtio_mem.c | 2 +- fs/fuse/virtio_fs.c | 4 ++-- include/linux/virtio.h | 1 + net/9p/trans_virtio.c | 2 +- net/vmw_vsock/virtio_transport.c | 4 ++-- sound/virtio/virtio_card.c | 4 ++-- 26 files changed, 40 insertions(+), 33 deletions(-) diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index c08066633023..22c4d87c9c15 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -616,7 +616,7 @@ static void um_pci_virtio_remove(struct virtio_device *vdev) int i; /* Stop all virtqueues */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); device_set_wakeup_enable(&vdev->dev, false); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ae38776e30e..644c6cddad6d 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -977,7 +977,7 @@ static void virtblk_remove(struct virtio_device *vdev) mutex_lock(&vblk->vdev_mutex); /* Stop all the virtqueues. */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ vblk->vdev = NULL; @@ -996,7 +996,7 @@ static int virtblk_freeze(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; /* Ensure we don't receive any more interrupts */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 57908ce4fae8..24a9258962fa 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -364,7 +364,7 @@ static void virtbt_remove(struct virtio_device *vdev) struct hci_dev *hdev = vbt->hdev; hci_unregister_dev(hdev); - vdev->config->reset(vdev); + virtio_reset_device(vdev); hci_free_dev(hdev); vbt->hdev = NULL; diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 0a7dde135db1..b2bf78b25630 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -179,7 +179,7 @@ static void remove_common(struct virtio_device *vdev) vi->data_avail = 0; vi->data_idx = 0; complete(&vi->have_data); - vdev->config->reset(vdev); + virtio_reset_device(vdev); if (vi->hwrng_register_done) hwrng_unregister(&vi->hwrng); vdev->config->del_vqs(vdev); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 660c5c388c29..2359889a35a0 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1958,7 +1958,7 @@ static void virtcons_remove(struct virtio_device *vdev) spin_unlock_irq(&pdrvdata_lock); /* Disable interrupts for vqs */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); /* Finish up work that's lined up */ if (use_multiport(portdev)) cancel_work_sync(&portdev->control_work); @@ -2148,7 +2148,7 @@ static int virtcons_freeze(struct virtio_device *vdev) portdev = vdev->priv; - vdev->config->reset(vdev); + virtio_reset_device(vdev); if (use_multiport(portdev)) virtqueue_disable_cb(portdev->c_ivq); diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index e2375d992308..8e977b7627cb 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -404,7 +404,7 @@ static int virtcrypto_probe(struct virtio_device *vdev) free_engines: virtcrypto_clear_crypto_engines(vcrypto); free_vqs: - vcrypto->vdev->config->reset(vdev); + virtio_reset_device(vdev); virtcrypto_del_vqs(vcrypto); free_dev: virtcrypto_devmgr_rm_dev(vcrypto); @@ -436,7 +436,7 @@ static void virtcrypto_remove(struct virtio_device *vdev) if (virtcrypto_dev_started(vcrypto)) virtcrypto_dev_stop(vcrypto); - vdev->config->reset(vdev); + virtio_reset_device(vdev); virtcrypto_free_unused_reqs(vcrypto); virtcrypto_clear_crypto_engines(vcrypto); virtcrypto_del_vqs(vcrypto); @@ -456,7 +456,7 @@ static int virtcrypto_freeze(struct virtio_device *vdev) { struct virtio_crypto *vcrypto = vdev->priv; - vdev->config->reset(vdev); + virtio_reset_device(vdev); virtcrypto_free_unused_reqs(vcrypto); if (virtcrypto_dev_started(vcrypto)) virtcrypto_dev_stop(vcrypto); @@ -492,7 +492,7 @@ static int virtcrypto_restore(struct virtio_device *vdev) free_engines: virtcrypto_clear_crypto_engines(vcrypto); free_vqs: - vcrypto->vdev->config->reset(vdev); + virtio_reset_device(vdev); virtcrypto_del_vqs(vcrypto); return err; } diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c index 87039c5c03fd..eefcc4146749 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -452,7 +452,7 @@ static void scmi_vio_remove(struct virtio_device *vdev) * outstanding message on any vqueue to be ignored by complete_cb: now * we can just stop processing buffers and destroy the vqueues. */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); /* Ensure scmi_vdev is visible as NULL */ smp_store_mb(scmi_vdev, NULL); diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c index 9f4941bc5760..fcc5e8c08973 100644 --- a/drivers/gpio/gpio-virtio.c +++ b/drivers/gpio/gpio-virtio.c @@ -450,7 +450,7 @@ static void virtio_gpio_request_vq(struct virtqueue *vq) static void virtio_gpio_free_vqs(struct virtio_device *vdev) { - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); } diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 21f410901694..3313b92db531 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -279,7 +279,7 @@ void virtio_gpu_deinit(struct drm_device *dev) flush_work(&vgdev->ctrlq.dequeue_work); flush_work(&vgdev->cursorq.dequeue_work); flush_work(&vgdev->config_changed_work); - vgdev->vdev->config->reset(vgdev->vdev); + virtio_reset_device(vgdev->vdev); vgdev->vdev->config->del_vqs(vgdev->vdev); } diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c index 41eb0dcc3204..4b9536f50800 100644 --- a/drivers/i2c/busses/i2c-virtio.c +++ b/drivers/i2c/busses/i2c-virtio.c @@ -165,7 +165,7 @@ err_free: static void virtio_i2c_del_vqs(struct virtio_device *vdev) { - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); } diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 80930ce04a16..1d4e1e7cf175 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1115,7 +1115,7 @@ static void viommu_remove(struct virtio_device *vdev) iommu_device_unregister(&viommu->iommu); /* Stop all virtqueues */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); dev_info(&vdev->dev, "device removed\n"); diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 91230894692d..444ef6a342f6 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -754,7 +754,7 @@ static void cfv_remove(struct virtio_device *vdev) debugfs_remove_recursive(cfv->debugfs); vringh_kiov_cleanup(&cfv->ctx.riov); - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->vringh_config->del_vrhs(cfv->vdev); cfv->vr_rx = NULL; vdev->config->del_vqs(cfv->vdev); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b107835242ad..eeed458c794e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3310,7 +3310,7 @@ static int virtnet_probe(struct virtio_device *vdev) return 0; free_unregister_netdev: - vi->vdev->config->reset(vdev); + virtio_reset_device(vdev); unregister_netdev(dev); free_failover: @@ -3326,7 +3326,7 @@ free: static void remove_vq_common(struct virtnet_info *vi) { - vi->vdev->config->reset(vi->vdev); + virtio_reset_device(vi->vdev); /* Free unused buffers in both send and recv, if any. */ free_unused_bufs(vi); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 23219f3747f8..02d374360bcc 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -4498,7 +4498,7 @@ static void remove_vqs(struct virtio_device *vdev) { int i; - vdev->config->reset(vdev); + virtio_reset_device(vdev); for (i = 0; i < ARRAY_SIZE(hwsim_vqs); i++) { struct virtqueue *vq = hwsim_vqs[i]; diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c index 726c7354d465..995b6cdc67ed 100644 --- a/drivers/nvdimm/virtio_pmem.c +++ b/drivers/nvdimm/virtio_pmem.c @@ -105,7 +105,7 @@ static void virtio_pmem_remove(struct virtio_device *vdev) nvdimm_bus_unregister(nvdimm_bus); vdev->config->del_vqs(vdev); - vdev->config->reset(vdev); + virtio_reset_device(vdev); } static struct virtio_driver virtio_pmem_driver = { diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 9c112aa65040..4bda40568dc9 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -1024,7 +1024,7 @@ static void rpmsg_remove(struct virtio_device *vdev) size_t total_buf_space = vrp->num_bufs * vrp->buf_size; int ret; - vdev->config->reset(vdev); + virtio_reset_device(vdev); ret = device_for_each_child(&vdev->dev, NULL, rpmsg_remove_device); if (ret) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 28e1d98ae102..c616c7171cc4 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -778,7 +778,7 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, static void virtscsi_remove_vqs(struct virtio_device *vdev) { /* Stop all the virtqueues. */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); } diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 236081afe9a2..00ac9db792a4 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -204,6 +204,12 @@ int virtio_finalize_features(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(virtio_finalize_features); +void virtio_reset_device(struct virtio_device *dev) +{ + dev->config->reset(dev); +} +EXPORT_SYMBOL_GPL(virtio_reset_device); + static int virtio_dev_probe(struct device *_d) { int err, i; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index c22ff0117b46..f4c34a2a6b8e 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -1056,7 +1056,7 @@ static void remove_common(struct virtio_balloon *vb) return_free_pages_to_mm(vb, ULONG_MAX); /* Now we reset the device so we can clean up the queues. */ - vb->vdev->config->reset(vb->vdev); + virtio_reset_device(vb->vdev); vb->vdev->config->del_vqs(vb->vdev); } diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index ce51ae165943..3aa46703872d 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -347,7 +347,7 @@ static void virtinput_remove(struct virtio_device *vdev) spin_unlock_irqrestore(&vi->lock, flags); input_unregister_device(vi->idev); - vdev->config->reset(vdev); + virtio_reset_device(vdev); while ((buf = virtqueue_detach_unused_buf(vi->sts)) != NULL) kfree(buf); vdev->config->del_vqs(vdev); diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 96e5a8782769..033fb93ed528 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -2850,7 +2850,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) virtio_mem_deinit_hotplug(vm); /* reset the device and cleanup the queues */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); kfree(vm); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 4cfa4bc1f579..ca2cac196d73 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -895,7 +895,7 @@ static int virtio_fs_probe(struct virtio_device *vdev) return 0; out_vqs: - vdev->config->reset(vdev); + virtio_reset_device(vdev); virtio_fs_cleanup_vqs(vdev, fs); kfree(fs->vqs); @@ -927,7 +927,7 @@ static void virtio_fs_remove(struct virtio_device *vdev) list_del_init(&fs->list); virtio_fs_stop_all_queues(fs); virtio_fs_drain_all_queues_locked(fs); - vdev->config->reset(vdev); + virtio_reset_device(vdev); virtio_fs_cleanup_vqs(vdev, fs); vdev->priv = NULL; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 41edbc01ffa4..72292a62cd90 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -138,6 +138,7 @@ int virtio_finalize_features(struct virtio_device *dev); int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); #endif +void virtio_reset_device(struct virtio_device *dev); size_t virtio_max_dma_size(struct virtio_device *vdev); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index bd5a89c4960d..8ff6d7160677 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -721,7 +721,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) mutex_unlock(&virtio_9p_lock); - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 4f7c99dfd16c..fb3302fff627 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -665,7 +665,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev) vsock_for_each_connected_socket(virtio_vsock_reset_sock); /* Stop all work handlers to make sure no one is accessing the device, - * so we can safely call vdev->config->reset(). + * so we can safely call virtio_reset_device(). */ mutex_lock(&vsock->rx_lock); vsock->rx_run = false; @@ -682,7 +682,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev) /* Flush all device writes and interrupts, device will not use any * more buffers. */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); mutex_lock(&vsock->rx_lock); while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index 150ab3e37013..e2847c040f75 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -350,7 +350,7 @@ static void virtsnd_remove(struct virtio_device *vdev) snd_card_free(snd->card); vdev->config->del_vqs(vdev); - vdev->config->reset(vdev); + virtio_reset_device(vdev); for (i = 0; snd->substreams && i < snd->nsubstreams; ++i) { struct virtio_pcm_substream *vss = &snd->substreams[i]; @@ -379,7 +379,7 @@ static int virtsnd_freeze(struct virtio_device *vdev) virtsnd_ctl_msg_cancel_all(snd); vdev->config->del_vqs(vdev); - vdev->config->reset(vdev); + virtio_reset_device(vdev); for (i = 0; i < snd->nsubstreams; ++i) cancel_work_sync(&snd->substreams[i].elapsed_period); From 2076207128948b766ef1fe344e0444fc9271a847 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 25 Nov 2021 13:00:17 -0500 Subject: [PATCH 425/493] hwrng: virtio - unregister device before reset unregister after reset is clearly wrong - device can be used while it's reset. There's an attempt to protect against that using hwrng_removed but it seems racy since access can be in progress when the flag is set. Just unregister, then reset seems simpler and cleaner. NB: we might be able to drop hwrng_removed in a follow-up patch. Signed-off-by: Laurent Vivier Signed-off-by: Michael S. Tsirkin --- drivers/char/hw_random/virtio-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index b2bf78b25630..e856df7e285c 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -179,9 +179,9 @@ static void remove_common(struct virtio_device *vdev) vi->data_avail = 0; vi->data_idx = 0; complete(&vi->have_data); - virtio_reset_device(vdev); if (vi->hwrng_register_done) hwrng_unregister(&vi->hwrng); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); ida_simple_remove(&rng_index_ida, vi->index); kfree(vi); From b4d80c8dda229c830f0ba38c103e5263c2a7bc64 Mon Sep 17 00:00:00 2001 From: Guanjun Date: Mon, 6 Dec 2021 16:48:18 +0800 Subject: [PATCH 426/493] vduse: moving kvfree into caller This free action should be moved into caller 'vduse_ioctl' in concert with the allocation. No functional change. Signed-off-by: Guanjun Link: https://lore.kernel.org/r/1638780498-55571-1-git-send-email-guanjun@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index eddcb64a910a..f3cc7dde39af 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1357,7 +1357,6 @@ err_domain: err_str: vduse_dev_destroy(dev); err: - kvfree(config_buf); return ret; } @@ -1408,6 +1407,8 @@ static long vduse_ioctl(struct file *file, unsigned int cmd, } config.name[VDUSE_NAME_MAX - 1] = '\0'; ret = vduse_create_dev(&config, buf, control->api_version); + if (ret) + kvfree(buf); break; } case VDUSE_DESTROY_DEV: { From 0f420c383a2bb414ebccedf9289b5b815f1295fe Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Wed, 1 Dec 2021 16:12:55 +0800 Subject: [PATCH 427/493] ifcvf/vDPA: fix misuse virtio-net device config size for blk dev This commit fixes a misuse of virtio-net device config size issue for virtio-block devices. A new member config_size in struct ifcvf_hw is introduced and would be initialized through vdpa_dev_add() to record correct device config size. To be more generic, rename ifcvf_hw.net_config to ifcvf_hw.dev_config, the helpers ifcvf_read/write_net_config() to ifcvf_read/write_dev_config() Signed-off-by: Zhu Lingshan Reported-and-suggested-by: Stefano Garzarella Reviewed-by: Stefano Garzarella Fixes: 6ad31d162a4e ("vDPA/ifcvf: enable Intel C5000X-PL virtio-block for vDPA") Cc: Link: https://lore.kernel.org/r/20211201081255.60187-1-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_base.c | 41 +++++++++++++++++++++++++-------- drivers/vdpa/ifcvf/ifcvf_base.h | 9 +++++--- drivers/vdpa/ifcvf/ifcvf_main.c | 24 ++++--------------- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 2808f1ba9f7b..7d41dfe48ade 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -143,8 +143,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr); break; case VIRTIO_PCI_CAP_DEVICE_CFG: - hw->net_cfg = get_cap_addr(hw, &cap); - IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg); + hw->dev_cfg = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg); break; } @@ -153,7 +153,7 @@ next: } if (hw->common_cfg == NULL || hw->notify_base == NULL || - hw->isr == NULL || hw->net_cfg == NULL) { + hw->isr == NULL || hw->dev_cfg == NULL) { IFCVF_ERR(pdev, "Incomplete PCI capabilities\n"); return -EIO; } @@ -174,7 +174,7 @@ next: IFCVF_DBG(pdev, "PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n", hw->common_cfg, hw->notify_base, hw->isr, - hw->net_cfg, hw->notify_off_multiplier); + hw->dev_cfg, hw->notify_off_multiplier); return 0; } @@ -242,33 +242,54 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features) return 0; } -void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, +u32 ifcvf_get_config_size(struct ifcvf_hw *hw) +{ + struct ifcvf_adapter *adapter; + u32 config_size; + + adapter = vf_to_adapter(hw); + switch (hw->dev_type) { + case VIRTIO_ID_NET: + config_size = sizeof(struct virtio_net_config); + break; + case VIRTIO_ID_BLOCK: + config_size = sizeof(struct virtio_blk_config); + break; + default: + config_size = 0; + IFCVF_ERR(adapter->pdev, "VIRTIO ID %u not supported\n", hw->dev_type); + } + + return config_size; +} + +void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset, void *dst, int length) { u8 old_gen, new_gen, *p; int i; - WARN_ON(offset + length > sizeof(struct virtio_net_config)); + WARN_ON(offset + length > hw->config_size); do { old_gen = ifc_ioread8(&hw->common_cfg->config_generation); p = dst; for (i = 0; i < length; i++) - *p++ = ifc_ioread8(hw->net_cfg + offset + i); + *p++ = ifc_ioread8(hw->dev_cfg + offset + i); new_gen = ifc_ioread8(&hw->common_cfg->config_generation); } while (old_gen != new_gen); } -void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, +void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset, const void *src, int length) { const u8 *p; int i; p = src; - WARN_ON(offset + length > sizeof(struct virtio_net_config)); + WARN_ON(offset + length > hw->config_size); for (i = 0; i < length; i++) - ifc_iowrite8(*p++, hw->net_cfg + offset + i); + ifc_iowrite8(*p++, hw->dev_cfg + offset + i); } static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index 09918af3ecf8..c486873f370a 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -71,12 +71,14 @@ struct ifcvf_hw { u64 hw_features; u32 dev_type; struct virtio_pci_common_cfg __iomem *common_cfg; - void __iomem *net_cfg; + void __iomem *dev_cfg; struct vring_info vring[IFCVF_MAX_QUEUES]; void __iomem * const *base; char config_msix_name[256]; struct vdpa_callback config_cb; unsigned int config_irq; + /* virtio-net or virtio-blk device config size */ + u32 config_size; }; struct ifcvf_adapter { @@ -105,9 +107,9 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev); int ifcvf_start_hw(struct ifcvf_hw *hw); void ifcvf_stop_hw(struct ifcvf_hw *hw); void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid); -void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, +void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset, void *dst, int length); -void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, +void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset, const void *src, int length); u8 ifcvf_get_status(struct ifcvf_hw *hw); void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); @@ -120,4 +122,5 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); int ifcvf_probed_virtio_net(struct ifcvf_hw *hw); +u32 ifcvf_get_config_size(struct ifcvf_hw *hw); #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 6dc75ca70b37..92ba7126e5d6 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -366,24 +366,9 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev) { - struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - struct pci_dev *pdev = adapter->pdev; - size_t size; - switch (vf->dev_type) { - case VIRTIO_ID_NET: - size = sizeof(struct virtio_net_config); - break; - case VIRTIO_ID_BLOCK: - size = sizeof(struct virtio_blk_config); - break; - default: - size = 0; - IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type); - } - - return size; + return vf->config_size; } static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, @@ -392,8 +377,7 @@ static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - WARN_ON(offset + len > sizeof(struct virtio_net_config)); - ifcvf_read_net_config(vf, offset, buf, len); + ifcvf_read_dev_config(vf, offset, buf, len); } static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev, @@ -402,8 +386,7 @@ static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev, { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - WARN_ON(offset + len > sizeof(struct virtio_net_config)); - ifcvf_write_net_config(vf, offset, buf, len); + ifcvf_write_dev_config(vf, offset, buf, len); } static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, @@ -542,6 +525,7 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, vf->vring[i].irq = -EINVAL; vf->hw_features = ifcvf_get_hw_features(vf); + vf->config_size = ifcvf_get_config_size(vf); adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev; ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring); From 9c25cdeb5f3ca9ef0d683ee8c29b7cb61a174165 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 26 Nov 2021 17:47:52 +0100 Subject: [PATCH 428/493] docs: document sysfs ABI for vDPA bus Add missing documentation of sysfs ABI for vDPA bus in the new Documentation/ABI/testing/sysfs-bus-vdpa file. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20211126164753.181829-2-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- Documentation/ABI/testing/sysfs-bus-vdpa | 37 ++++++++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 38 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-vdpa diff --git a/Documentation/ABI/testing/sysfs-bus-vdpa b/Documentation/ABI/testing/sysfs-bus-vdpa new file mode 100644 index 000000000000..4e55761a39df --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-vdpa @@ -0,0 +1,37 @@ +What: /sys/bus/vdpa/driver_autoprobe +Date: March 2020 +Contact: virtualization@lists.linux-foundation.org +Description: + This file determines whether new devices are immediately bound + to a driver after the creation. It initially contains 1, which + means the kernel automatically binds devices to a compatible + driver immediately after they are created. + + Writing "0" to this file disable this feature, any other string + enable it. + +What: /sys/bus/vdpa/driver_probe +Date: March 2020 +Contact: virtualization@lists.linux-foundation.org +Description: + Writing a device name to this file will cause the kernel binds + devices to a compatible driver. + + This can be useful when /sys/bus/vdpa/driver_autoprobe is + disabled. + +What: /sys/bus/vdpa/drivers/.../bind +Date: March 2020 +Contact: virtualization@lists.linux-foundation.org +Description: + Writing a device name to this file will cause the driver to + attempt to bind to the device. This is useful for overriding + default bindings. + +What: /sys/bus/vdpa/drivers/.../unbind +Date: March 2020 +Contact: virtualization@lists.linux-foundation.org +Description: + Writing a device name to this file will cause the driver to + attempt to unbind from the device. This may be useful when + overriding default bindings. diff --git a/MAINTAINERS b/MAINTAINERS index fb18ce7168aa..d10667bcc3cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20132,6 +20132,7 @@ M: "Michael S. Tsirkin" M: Jason Wang L: virtualization@lists.linux-foundation.org S: Maintained +F: Documentation/ABI/testing/sysfs-bus-vdpa F: Documentation/devicetree/bindings/virtio/ F: drivers/block/virtio_blk.c F: drivers/crypto/virtio/ From 539fec78edb4e084e7c532affc56cc42d4ceea4b Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 26 Nov 2021 17:47:53 +0100 Subject: [PATCH 429/493] vdpa: add driver_override support `driver_override` allows to control which of the vDPA bus drivers binds to a vDPA device. If `driver_override` is not set, the previous behaviour is followed: devices use the first vDPA bus driver loaded (unless auto binding is disabled). Tested on Fedora 34 with driverctl(8): $ modprobe virtio-vdpa $ modprobe vhost-vdpa $ modprobe vdpa-sim-net $ vdpa dev add mgmtdev vdpasim_net name dev1 # dev1 is attached to the first vDPA bus driver loaded $ driverctl -b vdpa list-devices dev1 virtio_vdpa $ driverctl -b vdpa set-override dev1 vhost_vdpa $ driverctl -b vdpa list-devices dev1 vhost_vdpa [*] Note: driverctl(8) integrates with udev so the binding is preserved. Suggested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20211126164753.181829-3-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- Documentation/ABI/testing/sysfs-bus-vdpa | 20 +++++++ drivers/vdpa/vdpa.c | 74 ++++++++++++++++++++++++ include/linux/vdpa.h | 2 + 3 files changed, 96 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-vdpa b/Documentation/ABI/testing/sysfs-bus-vdpa index 4e55761a39df..28a6111202ba 100644 --- a/Documentation/ABI/testing/sysfs-bus-vdpa +++ b/Documentation/ABI/testing/sysfs-bus-vdpa @@ -35,3 +35,23 @@ Description: Writing a device name to this file will cause the driver to attempt to unbind from the device. This may be useful when overriding default bindings. + +What: /sys/bus/vdpa/devices/.../driver_override +Date: November 2021 +Contact: virtualization@lists.linux-foundation.org +Description: + This file allows the driver for a device to be specified. + When specified, only a driver with a name matching the value + written to driver_override will have an opportunity to bind to + the device. The override is specified by writing a string to the + driver_override file (echo vhost-vdpa > driver_override) and may + be cleared with an empty string (echo > driver_override). + This returns the device to standard matching rules binding. + Writing to driver_override does not automatically unbind the + device from its current driver or make any attempt to + automatically load the specified driver. If no driver with a + matching name is currently loaded in the kernel, the device will + not bind to any driver. This also allows devices to opt-out of + driver binding using a driver_override name such as "none". + Only a single driver may be specified in the override, there is + no support for parsing delimiters. diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 09bbe53c3ac4..59d0b8bbb79c 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -52,8 +52,81 @@ static void vdpa_dev_remove(struct device *d) drv->remove(vdev); } +static int vdpa_dev_match(struct device *dev, struct device_driver *drv) +{ + struct vdpa_device *vdev = dev_to_vdpa(dev); + + /* Check override first, and if set, only use the named driver */ + if (vdev->driver_override) + return strcmp(vdev->driver_override, drv->name) == 0; + + /* Currently devices must be supported by all vDPA bus drivers */ + return 1; +} + +static ssize_t driver_override_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct vdpa_device *vdev = dev_to_vdpa(dev); + const char *driver_override, *old; + char *cp; + + /* We need to keep extra room for a newline */ + if (count >= (PAGE_SIZE - 1)) + return -EINVAL; + + driver_override = kstrndup(buf, count, GFP_KERNEL); + if (!driver_override) + return -ENOMEM; + + cp = strchr(driver_override, '\n'); + if (cp) + *cp = '\0'; + + device_lock(dev); + old = vdev->driver_override; + if (strlen(driver_override)) { + vdev->driver_override = driver_override; + } else { + kfree(driver_override); + vdev->driver_override = NULL; + } + device_unlock(dev); + + kfree(old); + + return count; +} + +static ssize_t driver_override_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vdpa_device *vdev = dev_to_vdpa(dev); + ssize_t len; + + device_lock(dev); + len = snprintf(buf, PAGE_SIZE, "%s\n", vdev->driver_override); + device_unlock(dev); + + return len; +} +static DEVICE_ATTR_RW(driver_override); + +static struct attribute *vdpa_dev_attrs[] = { + &dev_attr_driver_override.attr, + NULL, +}; + +static const struct attribute_group vdpa_dev_group = { + .attrs = vdpa_dev_attrs, +}; +__ATTRIBUTE_GROUPS(vdpa_dev); + static struct bus_type vdpa_bus = { .name = "vdpa", + .dev_groups = vdpa_dev_groups, + .match = vdpa_dev_match, .probe = vdpa_dev_probe, .remove = vdpa_dev_remove, }; @@ -68,6 +141,7 @@ static void vdpa_release_dev(struct device *d) ida_simple_remove(&vdpa_index_ida, vdev->index); mutex_destroy(&vdev->cf_mutex); + kfree(vdev->driver_override); kfree(vdev); } diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index c3011ccda430..ae34015b37b7 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -64,6 +64,7 @@ struct vdpa_mgmt_dev; * struct vdpa_device - representation of a vDPA device * @dev: underlying device * @dma_dev: the actual device that is performing DMA + * @driver_override: driver name to force a match * @config: the configuration ops for this device. * @cf_mutex: Protects get and set access to configuration layout. * @index: device index @@ -76,6 +77,7 @@ struct vdpa_mgmt_dev; struct vdpa_device { struct device dev; struct device *dma_dev; + const char *driver_override; const struct vdpa_config_ops *config; struct mutex cf_mutex; /* Protects get/set config */ unsigned int index; From 6639032acc085775452a2c92fdb8a6f601c9c075 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Nov 2021 14:42:08 +0100 Subject: [PATCH 430/493] virtio-mem: prepare page onlining code for granularity smaller than MAX_ORDER - 1 Let's prepare our page onlining code for subblock size smaller than MAX_ORDER - 1: we'll get called for a MAX_ORDER - 1 page but might have some subblocks in the range plugged and some unplugged. In that case, fallback to subblock granularity to properly only expose the plugged parts to the buddy. Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20211126134209.17332-2-david@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Zi Yan Reviewed-by: Eric Ren --- drivers/virtio/virtio_mem.c | 86 ++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 033fb93ed528..434aba845245 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -1228,28 +1229,46 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, page_ref_inc(pfn_to_page(pfn + i)); } -static void virtio_mem_online_page_cb(struct page *page, unsigned int order) +static void virtio_mem_online_page(struct virtio_mem *vm, + struct page *page, unsigned int order) { - const unsigned long addr = page_to_phys(page); - unsigned long id, sb_id; - struct virtio_mem *vm; + const unsigned long start = page_to_phys(page); + const unsigned long end = start + PFN_PHYS(1 << order); + unsigned long addr, next, id, sb_id, count; bool do_online; - rcu_read_lock(); - list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { - if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) - continue; + /* + * We can get called with any order up to MAX_ORDER - 1. If our + * subblock size is smaller than that and we have a mixture of plugged + * and unplugged subblocks within such a page, we have to process in + * smaller granularity. In that case we'll adjust the order exactly once + * within the loop. + */ + for (addr = start; addr < end; ) { + next = addr + PFN_PHYS(1 << order); if (vm->in_sbm) { - /* - * We exploit here that subblocks have at least - * MAX_ORDER_NR_PAGES size/alignment - so we cannot - * cross subblocks within one call. - */ id = virtio_mem_phys_to_mb_id(addr); sb_id = virtio_mem_phys_to_sb_id(vm, addr); - do_online = virtio_mem_sbm_test_sb_plugged(vm, id, - sb_id, 1); + count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1; + + if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) { + /* Fully plugged. */ + do_online = true; + } else if (count == 1 || + virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) { + /* Fully unplugged. */ + do_online = false; + } else { + /* + * Mixture, process sub-blocks instead. This + * will be at least the size of a pageblock. + * We'll run into this case exactly once. + */ + order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT; + do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1); + continue; + } } else { /* * If the whole block is marked fake offline, keep @@ -1260,18 +1279,38 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) VIRTIO_MEM_BBM_BB_FAKE_OFFLINE; } + if (do_online) + generic_online_page(pfn_to_page(PFN_DOWN(addr)), order); + else + virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, + false); + addr = next; + } +} + +static void virtio_mem_online_page_cb(struct page *page, unsigned int order) +{ + const unsigned long addr = page_to_phys(page); + struct virtio_mem *vm; + + rcu_read_lock(); + list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { /* - * virtio_mem_set_fake_offline() might sleep, we don't need - * the device anymore. See virtio_mem_remove() how races + * Pages we're onlining will never cross memory blocks and, + * therefore, not virtio-mem devices. + */ + if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) + continue; + + /* + * virtio_mem_set_fake_offline() might sleep. We can safely + * drop the RCU lock at this point because the device + * cannot go away. See virtio_mem_remove() how races * between memory onlining and device removal are handled. */ rcu_read_unlock(); - if (do_online) - generic_online_page(page, order); - else - virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, - false); + virtio_mem_online_page(vm, page, order); return; } rcu_read_unlock(); @@ -2438,8 +2477,7 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm) /* * We want subblocks to span at least MAX_ORDER_NR_PAGES and * pageblock_nr_pages pages. This: - * - Simplifies our page onlining code (virtio_mem_online_page_cb) - * and fake page onlining code (virtio_mem_fake_online). + * - Simplifies our fake page onlining code (virtio_mem_fake_online). * - Is required for now for alloc_contig_range() to work reliably - * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ From 57c5a5b304b0d3798b2d3dc75bd6c960d78ee8ff Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Nov 2021 14:42:09 +0100 Subject: [PATCH 431/493] virtio-mem: prepare fake page onlining code for granularity smaller than MAX_ORDER - 1 Let's prepare our fake page onlining code for subblock size smaller than MAX_ORDER - 1: we might get called for ranges not covering properly aligned MAX_ORDER - 1 pages. We have to detect the order to use dynamically. Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20211126134209.17332-3-david@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Zi Yan Reviewed-by: Eric Ren --- drivers/virtio/virtio_mem.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 434aba845245..a6a78685cfbe 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1121,15 +1121,18 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn, */ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) { - const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES; + unsigned long order = MAX_ORDER - 1; unsigned long i; /* - * We are always called at least with MAX_ORDER_NR_PAGES - * granularity/alignment (e.g., the way subblocks work). All pages - * inside such a block are alike. + * We might get called for ranges that don't cover properly aligned + * MAX_ORDER - 1 pages; however, we can only online properly aligned + * pages with an order of MAX_ORDER - 1 at maximum. */ - for (i = 0; i < nr_pages; i += max_nr_pages) { + while (!IS_ALIGNED(pfn | nr_pages, 1 << order)) + order--; + + for (i = 0; i < nr_pages; i += 1 << order) { struct page *page = pfn_to_page(pfn + i); /* @@ -1139,14 +1142,12 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) * alike. */ if (PageDirty(page)) { - virtio_mem_clear_fake_offline(pfn + i, max_nr_pages, - false); - generic_online_page(page, MAX_ORDER - 1); + virtio_mem_clear_fake_offline(pfn + i, 1 << order, false); + generic_online_page(page, order); } else { - virtio_mem_clear_fake_offline(pfn + i, max_nr_pages, - true); - free_contig_range(pfn + i, max_nr_pages); - adjust_managed_page_count(page, max_nr_pages); + virtio_mem_clear_fake_offline(pfn + i, 1 << order, true); + free_contig_range(pfn + i, 1 << order); + adjust_managed_page_count(page, 1 << order); } } } @@ -2477,7 +2478,6 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm) /* * We want subblocks to span at least MAX_ORDER_NR_PAGES and * pageblock_nr_pages pages. This: - * - Simplifies our fake page onlining code (virtio_mem_fake_online). * - Is required for now for alloc_contig_range() to work reliably - * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ From 60af39c1f4cc92cc2785ef745c0c97558134d539 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 24 Nov 2021 19:09:49 +0200 Subject: [PATCH 432/493] net/mlx5_vdpa: Offer VIRTIO_NET_F_MTU when setting MTU Make sure to offer VIRTIO_NET_F_MTU since we configure the MTU based on what was queried from the device. This allows the virtio driver to allocate large enough buffers based on the reported MTU. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20211124170949.51725-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Si-Wei Liu --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 63813fbb5f62..d8e69340a25a 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1895,6 +1895,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS); + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MTU); print_features(mvdev, ndev->mvdev.mlx_features, false); return ndev->mvdev.mlx_features; From 10aa250b2f7d87d0921c5e991fd49e607050d486 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Nov 2021 17:14:56 +0100 Subject: [PATCH 433/493] eni_vdpa: Simplify 'eni_vdpa_probe()' When 'pcim_enable_device()' is used, some resources become automagically managed. There is no need to call 'pci_free_irq_vectors()' when the driver is removed. The same will already be done by 'pcim_release()'. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/02045bdcbbb25f79bae4827f66029cfcddc90381.1636301587.git.christophe.jaillet@wanadoo.fr Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/alibaba/eni_vdpa.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index 3f788794571a..fe1b83b5f80d 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -450,11 +450,6 @@ static u16 eni_vdpa_get_num_queues(struct eni_vdpa *eni_vdpa) return num; } -static void eni_vdpa_free_irq_vectors(void *data) -{ - pci_free_irq_vectors(data); -} - static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; @@ -488,13 +483,6 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) eni_vdpa->vdpa.dma_dev = &pdev->dev; eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa); - ret = devm_add_action_or_reset(dev, eni_vdpa_free_irq_vectors, pdev); - if (ret) { - ENI_ERR(pdev, - "failed for adding devres for freeing irq vectors\n"); - goto err; - } - eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues, sizeof(*eni_vdpa->vring), GFP_KERNEL); From 23118b09e6e11a03cb40a86875d5d342257ae1ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 4 Nov 2021 20:58:33 +0100 Subject: [PATCH 434/493] vdpa: Avoid duplicate call to vp_vdpa get_status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It has no sense to call get_status twice, since we already have a variable for that. Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20211104195833.2089796-1-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e3c4f059b21a..ed2aabf07e74 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -170,7 +170,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) * Userspace shouldn't remove status bits unless reset the * status to 0. */ - if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) + if (status != 0 && (status_old & ~status) != 0) return -EINVAL; if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) From 28cc408be72cebb0f3fcc37bc74ab3196d4de726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 4 Nov 2021 20:52:48 +0100 Subject: [PATCH 435/493] vdpa: Mark vdpa_config_ops.get_vq_notification as optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since vhost_vdpa_mmap checks for its existence before calling it. Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20211104195248.2088904-1-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- include/linux/vdpa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index ae34015b37b7..2b7db96bb7d3 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -157,7 +157,7 @@ struct vdpa_map_file { * @vdev: vdpa device * @idx: virtqueue index * @state: pointer to returned state (last_avail_idx) - * @get_vq_notification: Get the notification area for a virtqueue + * @get_vq_notification: Get the notification area for a virtqueue (optional) * @vdev: vdpa device * @idx: virtqueue index * Returns the notifcation area From d3e305592d69e21e36b76d24ca3c01971a2d09be Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 Dec 2021 14:25:25 +0100 Subject: [PATCH 436/493] firmware: qemu_fw_cfg: fix NULL-pointer deref on duplicate entries Commit fe3c60684377 ("firmware: Fix a reference count leak.") "fixed" a kobject leak in the file registration helper by properly calling kobject_put() for the entry in case registration of the object fails (e.g. due to a name collision). This would however result in a NULL pointer dereference when the release function tries to remove the never added entry from the fw_cfg_entry_cache list. Fix this by moving the list-removal out of the release function. Note that the offending commit was one of the benign looking umn.edu fixes which was reviewed but not reverted. [1][2] [1] https://lore.kernel.org/r/202105051005.49BFABCE@keescook [2] https://lore.kernel.org/all/YIg7ZOZvS3a8LjSv@kroah.com Fixes: fe3c60684377 ("firmware: Fix a reference count leak.") Cc: stable@vger.kernel.org # 5.8 Cc: Qiushi Wu Cc: Kees Cook Cc: Greg Kroah-Hartman Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211201132528.30025-2-johan@kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/firmware/qemu_fw_cfg.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 172c751a4f6c..a9c64ebfc49a 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -388,9 +388,7 @@ static void fw_cfg_sysfs_cache_cleanup(void) struct fw_cfg_sysfs_entry *entry, *next; list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) { - /* will end up invoking fw_cfg_sysfs_cache_delist() - * via each object's release() method (i.e. destructor) - */ + fw_cfg_sysfs_cache_delist(entry); kobject_put(&entry->kobj); } } @@ -448,7 +446,6 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj) { struct fw_cfg_sysfs_entry *entry = to_entry(kobj); - fw_cfg_sysfs_cache_delist(entry); kfree(entry); } From 6004e351da50565fb561be85d45151dc9c370023 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 Dec 2021 14:25:26 +0100 Subject: [PATCH 437/493] firmware: qemu_fw_cfg: fix kobject leak in probe error path An initialised kobject must be freed using kobject_put() to avoid leaking associated resources (e.g. the object name). Commit fe3c60684377 ("firmware: Fix a reference count leak.") "fixed" the leak in the first error path of the file registration helper but left the second one unchanged. This "fix" would however result in a NULL pointer dereference due to the release function also removing the never added entry from the fw_cfg_entry_cache list. This has now been addressed. Fix the remaining kobject leak by restoring the common error path and adding the missing kobject_put(). Fixes: 75f3e8e47f38 ("firmware: introduce sysfs driver for QEMU's fw_cfg device") Cc: stable@vger.kernel.org # 4.6 Cc: Gabriel Somlo Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211201132528.30025-3-johan@kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/firmware/qemu_fw_cfg.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index a9c64ebfc49a..ccb7ed62452f 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -603,15 +603,13 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f) /* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */ err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype, fw_cfg_sel_ko, "%d", entry->select); - if (err) { - kobject_put(&entry->kobj); - return err; - } + if (err) + goto err_put_entry; /* add raw binary content access */ err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw); if (err) - goto err_add_raw; + goto err_del_entry; /* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */ fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name); @@ -620,9 +618,10 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f) fw_cfg_sysfs_cache_enlist(entry); return 0; -err_add_raw: +err_del_entry: kobject_del(&entry->kobj); - kfree(entry); +err_put_entry: + kobject_put(&entry->kobj); return err; } From 1b656e9aad7f4886ed466094d1dc5ee4dd900d20 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 Dec 2021 14:25:27 +0100 Subject: [PATCH 438/493] firmware: qemu_fw_cfg: fix sysfs information leak Make sure to always NUL-terminate file names retrieved from the firmware to avoid accessing data beyond the entry slab buffer and exposing it through sysfs in case the firmware data is corrupt. Fixes: 75f3e8e47f38 ("firmware: introduce sysfs driver for QEMU's fw_cfg device") Cc: stable@vger.kernel.org # 4.6 Cc: Gabriel Somlo Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211201132528.30025-4-johan@kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/firmware/qemu_fw_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index ccb7ed62452f..f08e056ed0ae 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -598,7 +598,7 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f) /* set file entry information */ entry->size = be32_to_cpu(f->size); entry->select = be16_to_cpu(f->select); - memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH); + strscpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH); /* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */ err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype, From 9f8b4ae2ac7dc5ff6e5dfa723c1ef2bad80a8c68 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 Dec 2021 14:25:28 +0100 Subject: [PATCH 439/493] firmware: qemu_fw_cfg: remove sysfs entries explicitly Explicitly remove the file entries from sysfs before dropping the final reference for symmetry reasons and for consistency with the rest of the driver. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211201132528.30025-5-johan@kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/firmware/qemu_fw_cfg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index f08e056ed0ae..b436342115af 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -389,6 +389,7 @@ static void fw_cfg_sysfs_cache_cleanup(void) list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) { fw_cfg_sysfs_cache_delist(entry); + kobject_del(&entry->kobj); kobject_put(&entry->kobj); } } From 6017599bb25c20b7a68cbb8e7d534bdc1c36b5e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E8=B4=87?= Date: Thu, 9 Dec 2021 11:29:25 +0800 Subject: [PATCH 440/493] virtio-pci: fix the confusing error message The error message on the failure of pfn check should tell virtio-pci rather than virtio-mmio, just fix it. Signed-off-by: Michael Wang Suggested-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/ae5e154e-ac59-f0fa-a7c7-091a2201f581@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index b3f8128b7983..34141b9abe27 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -138,7 +138,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; if (q_pfn >> 32) { dev_err(&vp_dev->pci_dev->dev, - "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB\n", + "platform bug: legacy virtio-pci must not be used with RAM above 0x%llxGB\n", 0x1ULL << (32 + PAGE_SHIFT - 30)); err = -E2BIG; goto out_del_vq; From 2b68224ec61bb66aa2a24ad099b6af65138fa888 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Fri, 10 Dec 2021 15:35:46 +0800 Subject: [PATCH 441/493] virtio: fix a typo in function "vp_modern_remove" comments. Function name "vp_modern_remove" in comments is written to "vp_modern_probe" incorrectly. Change it. Signed-off-by: Dapeng Mi Link: https://lore.kernel.org/r/20211210073546.700783-1-dapeng1.mi@intel.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/virtio/virtio_pci_modern_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index e11ed748e661..e8b3ff2b9fbc 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -345,7 +345,7 @@ err_map_common: EXPORT_SYMBOL_GPL(vp_modern_probe); /* - * vp_modern_probe: remove and cleanup the modern virtio pci device + * vp_modern_remove: remove and cleanup the modern virtio pci device * @mdev: the modern virtio-pci device */ void vp_modern_remove(struct virtio_pci_modern_device *mdev) From cf4a4493ff70874f8af26d75d4346c591c298e89 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Wed, 22 Dec 2021 09:12:25 +0800 Subject: [PATCH 442/493] virtio/virtio_mem: handle a possible NULL as a memcpy parameter There is a check for vm->sbm.sb_states before, and it should check it here as well. Signed-off-by: Peng Hao Link: https://lore.kernel.org/r/20211222011225.40573-1-flyingpeng@tencent.com Signed-off-by: Michael S. Tsirkin Fixes: 5f1f79bbc9e2 ("virtio-mem: Paravirtualized memory hotplug") Cc: stable@vger.kernel.org # v5.8+ --- drivers/virtio/virtio_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index a6a78685cfbe..38becd8d578c 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -593,7 +593,7 @@ static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) return -ENOMEM; mutex_lock(&vm->hotplug_mutex); - if (new_bitmap) + if (vm->sbm.sb_states) memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE); old_bitmap = vm->sbm.sb_states; From 49814ce9e21a8b6d6158b2e502fe6bacef4d99b6 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Wed, 22 Dec 2021 19:20:14 +0800 Subject: [PATCH 443/493] virtio/virtio_pci_legacy_dev: ensure the correct return value When pci_iomap return NULL, the return value is zero. Signed-off-by: Peng Hao Link: https://lore.kernel.org/r/20211222112014.87394-1-flyingpeng@tencent.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_pci_legacy_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c index 9b97680dd02b..677d1f68bc9b 100644 --- a/drivers/virtio/virtio_pci_legacy_dev.c +++ b/drivers/virtio/virtio_pci_legacy_dev.c @@ -45,8 +45,10 @@ int vp_legacy_probe(struct virtio_pci_legacy_device *ldev) return rc; ldev->ioaddr = pci_iomap(pci_dev, 0, 0); - if (!ldev->ioaddr) + if (!ldev->ioaddr) { + rc = -EIO; goto err_iomap; + } ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR; From 97143b70aa847f2b0a1f959dde126b76ff7b5376 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 30 Dec 2021 16:20:24 +0200 Subject: [PATCH 444/493] vdpa/mlx5: Fix wrong configuration of virtio_version_1_0 Remove overriding of virtio_version_1_0 which forced the virtqueue object to version 1. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20211230142024.142979-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Parav Pandit Acked-by: Jason Wang Reviewed-by: Si-Wei Liu --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index d8e69340a25a..c104d7699c16 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -876,8 +876,6 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); - if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type)) - MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1); err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); if (err) From 080063920777af65105e5953e2851e036376e3ea Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Tue, 28 Dec 2021 11:09:24 +0800 Subject: [PATCH 445/493] vhost/test: fix memory leak of vhost virtqueues We need free the vqs in .release(), which are allocated in .open(). Signed-off-by: Xianting Tian Link: https://lore.kernel.org/r/20211228030924.3468439-1-xianting.tian@linux.alibaba.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index a09dedc79f68..05740cba1cd8 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -166,6 +166,7 @@ static int vhost_test_release(struct inode *inode, struct file *f) /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_test_flush(n); + kfree(n->dev.vqs); kfree(n); return 0; } From 1861ba626ae9b98136f3e504208cdef6b29cd3ec Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 6 Jan 2022 07:57:46 -0500 Subject: [PATCH 446/493] virtio_ring: mark ring unused on error A recently added error path does not mark ring unused when exiting on OOM, which will lead to BUG on the next entry in debug builds. TODO: refactor code so we have START_USE and END_USE in the same function. Fixes: fc6d70f40b3d ("virtio_ring: check desc == NULL when using indirect with packed") Cc: "Xuan Zhuo" Cc: Jiasheng Jiang Reviewed-by: Xuan Zhuo Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 028b05d44546..962f1477b1fa 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1197,8 +1197,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, if (virtqueue_use_indirect(_vq, total_sg)) { err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); - if (err != -ENOMEM) + if (err != -ENOMEM) { + END_USE(vq); return err; + } /* fall back on direct */ } From 870aaff92e959e29d40f9cfdb5ed06ba2fc2dae0 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 6 Jan 2022 08:09:25 -0500 Subject: [PATCH 447/493] vdpa: clean up get_config_size ret value handling The return type of get_config_size is size_t so it makes sense to change the type of the variable holding its result. That said, this already got taken care of (differently, and arguably not as well) by commit 3ed21c1451a1 ("vdpa: check that offsets are within bounds"). The added 'c->off > size' test in that commit will be done as an unsigned comparison on 32-bit (safe due to not being signed). On a 64-bit platform, it will be done as a signed comparison, but in that case the comparison will be done in 64-bit, and 'c->off' being an u32 it will be valid thanks to the extended range (ie both values will be positive in 64 bits). So this was a real bug, but it was already addressed and marked for stable. Signed-off-by: Laura Abbott Reported-by: Luo Likang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ed2aabf07e74..ecfccd687ea0 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -195,7 +195,7 @@ static int vhost_vdpa_config_validate(struct vhost_vdpa *v, struct vhost_vdpa_config *c) { struct vdpa_device *vdpa = v->vdpa; - long size = vdpa->config->get_config_size(vdpa); + size_t size = vdpa->config->get_config_size(vdpa); if (c->len == 0 || c->off > size) return -EINVAL; From a64917bc2e9b1e0aa716b783c4ec879fdd280300 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:33 +0200 Subject: [PATCH 448/493] vdpa: Provide interface to read driver features Provide an interface to read the negotiated features. This is needed when building the netlink message in vdpa_dev_net_config_fill(). Also fix the implementation of vdpa_dev_net_config_fill() to use the negotiated features instead of the device features. To make APIs clearer, make the following name changes to struct vdpa_config_ops so they better describe their operations: get_features -> get_device_features set_features -> set_driver_features Finally, add get_driver_features to return the negotiated features and add implementation to all the upstream drivers. Acked-by: Jason Wang Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-2-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/alibaba/eni_vdpa.c | 16 ++++++++++++---- drivers/vdpa/ifcvf/ifcvf_main.c | 16 ++++++++++++---- drivers/vdpa/mlx5/net/mlx5_vnet.c | 16 ++++++++++++---- drivers/vdpa/vdpa.c | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim.c | 21 +++++++++++++++------ drivers/vdpa/vdpa_user/vduse_dev.c | 16 ++++++++++++---- drivers/vdpa/virtio_pci/vp_vdpa.c | 16 ++++++++++++---- drivers/vhost/vdpa.c | 2 +- drivers/virtio/virtio_vdpa.c | 2 +- include/linux/vdpa.h | 14 +++++++++----- 10 files changed, 87 insertions(+), 34 deletions(-) diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index fe1b83b5f80d..f480d54f308c 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -58,7 +58,7 @@ static struct virtio_pci_legacy_device *vdpa_to_ldev(struct vdpa_device *vdpa) return &eni_vdpa->ldev; } -static u64 eni_vdpa_get_features(struct vdpa_device *vdpa) +static u64 eni_vdpa_get_device_features(struct vdpa_device *vdpa) { struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); u64 features = vp_legacy_get_features(ldev); @@ -69,7 +69,7 @@ static u64 eni_vdpa_get_features(struct vdpa_device *vdpa) return features; } -static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features) +static int eni_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) { struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); @@ -84,6 +84,13 @@ static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features) return 0; } +static u64 eni_vdpa_get_driver_features(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_driver_features(ldev); +} + static u8 eni_vdpa_get_status(struct vdpa_device *vdpa) { struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); @@ -401,8 +408,9 @@ static void eni_vdpa_set_config_cb(struct vdpa_device *vdpa, } static const struct vdpa_config_ops eni_vdpa_ops = { - .get_features = eni_vdpa_get_features, - .set_features = eni_vdpa_set_features, + .get_device_features = eni_vdpa_get_device_features, + .set_driver_features = eni_vdpa_set_driver_features, + .get_driver_features = eni_vdpa_get_driver_features, .get_status = eni_vdpa_get_status, .set_status = eni_vdpa_set_status, .reset = eni_vdpa_reset, diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 92ba7126e5d6..d1a6b5ab543c 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -169,7 +169,7 @@ static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev) return &adapter->vf; } -static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) +static u64 ifcvf_vdpa_get_device_features(struct vdpa_device *vdpa_dev) { struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); @@ -187,7 +187,7 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) return features; } -static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features) +static int ifcvf_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 features) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); int ret; @@ -201,6 +201,13 @@ static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features) return 0; } +static u64 ifcvf_vdpa_get_driver_features(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return vf->req_features; +} + static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); @@ -426,8 +433,9 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic * implemented set_map()/dma_map()/dma_unmap() */ static const struct vdpa_config_ops ifc_vdpa_ops = { - .get_features = ifcvf_vdpa_get_features, - .set_features = ifcvf_vdpa_set_features, + .get_device_features = ifcvf_vdpa_get_device_features, + .set_driver_features = ifcvf_vdpa_set_driver_features, + .get_driver_features = ifcvf_vdpa_get_driver_features, .get_status = ifcvf_vdpa_get_status, .set_status = ifcvf_vdpa_set_status, .reset = ifcvf_vdpa_reset, diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index c104d7699c16..3206e355230c 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1878,7 +1878,7 @@ static u64 mlx_to_vritio_features(u16 dev_features) return result; } -static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) +static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); @@ -1971,7 +1971,7 @@ static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) } } -static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) +static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); @@ -2338,6 +2338,13 @@ static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx) return -EOPNOTSUPP; } +static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + + return mvdev->actual_features; +} + static const struct vdpa_config_ops mlx5_vdpa_ops = { .set_vq_address = mlx5_vdpa_set_vq_address, .set_vq_num = mlx5_vdpa_set_vq_num, @@ -2350,8 +2357,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vq_notification = mlx5_get_vq_notification, .get_vq_irq = mlx5_get_vq_irq, .get_vq_align = mlx5_vdpa_get_vq_align, - .get_features = mlx5_vdpa_get_features, - .set_features = mlx5_vdpa_set_features, + .get_device_features = mlx5_vdpa_get_device_features, + .set_driver_features = mlx5_vdpa_set_driver_features, + .get_driver_features = mlx5_vdpa_get_driver_features, .set_config_cb = mlx5_vdpa_set_config_cb, .get_vq_num_max = mlx5_vdpa_get_vq_num_max, .get_device_id = mlx5_vdpa_get_device_id, diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 59d0b8bbb79c..42d71d60d5dc 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -808,7 +808,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) return -EMSGSIZE; - features = vdev->config->get_features(vdev); + features = vdev->config->get_driver_features(vdev); return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config); } diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 41b0cd17fcba..ddbe142af09a 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -399,14 +399,14 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) return VDPASIM_QUEUE_ALIGN; } -static u64 vdpasim_get_features(struct vdpa_device *vdpa) +static u64 vdpasim_get_device_features(struct vdpa_device *vdpa) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); return vdpasim->dev_attr.supported_features; } -static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) +static int vdpasim_set_driver_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -419,6 +419,13 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) return 0; } +static u64 vdpasim_get_driver_features(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->features; +} + static void vdpasim_set_config_cb(struct vdpa_device *vdpa, struct vdpa_callback *cb) { @@ -613,8 +620,9 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .set_vq_state = vdpasim_set_vq_state, .get_vq_state = vdpasim_get_vq_state, .get_vq_align = vdpasim_get_vq_align, - .get_features = vdpasim_get_features, - .set_features = vdpasim_set_features, + .get_device_features = vdpasim_get_device_features, + .set_driver_features = vdpasim_set_driver_features, + .get_driver_features = vdpasim_get_driver_features, .set_config_cb = vdpasim_set_config_cb, .get_vq_num_max = vdpasim_get_vq_num_max, .get_device_id = vdpasim_get_device_id, @@ -642,8 +650,9 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .set_vq_state = vdpasim_set_vq_state, .get_vq_state = vdpasim_get_vq_state, .get_vq_align = vdpasim_get_vq_align, - .get_features = vdpasim_get_features, - .set_features = vdpasim_set_features, + .get_device_features = vdpasim_get_device_features, + .set_driver_features = vdpasim_set_driver_features, + .get_driver_features = vdpasim_get_driver_features, .set_config_cb = vdpasim_set_config_cb, .get_vq_num_max = vdpasim_get_vq_num_max, .get_device_id = vdpasim_get_device_id, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index f3cc7dde39af..f85d1a08ed87 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -573,14 +573,14 @@ static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) return dev->vq_align; } -static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa) +static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) { struct vduse_dev *dev = vdpa_to_vduse(vdpa); return dev->device_features; } -static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features) +static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) { struct vduse_dev *dev = vdpa_to_vduse(vdpa); @@ -588,6 +588,13 @@ static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features) return 0; } +static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + return dev->driver_features; +} + static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, struct vdpa_callback *cb) { @@ -721,8 +728,9 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { .set_vq_state = vduse_vdpa_set_vq_state, .get_vq_state = vduse_vdpa_get_vq_state, .get_vq_align = vduse_vdpa_get_vq_align, - .get_features = vduse_vdpa_get_features, - .set_features = vduse_vdpa_set_features, + .get_device_features = vduse_vdpa_get_device_features, + .set_driver_features = vduse_vdpa_set_driver_features, + .get_driver_features = vduse_vdpa_get_driver_features, .set_config_cb = vduse_vdpa_set_config_cb, .get_vq_num_max = vduse_vdpa_get_vq_num_max, .get_device_id = vduse_vdpa_get_device_id, diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index e3ff7875e123..a57e381e830b 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -53,14 +53,14 @@ static struct virtio_pci_modern_device *vdpa_to_mdev(struct vdpa_device *vdpa) return &vp_vdpa->mdev; } -static u64 vp_vdpa_get_features(struct vdpa_device *vdpa) +static u64 vp_vdpa_get_device_features(struct vdpa_device *vdpa) { struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); return vp_modern_get_features(mdev); } -static int vp_vdpa_set_features(struct vdpa_device *vdpa, u64 features) +static int vp_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) { struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); @@ -69,6 +69,13 @@ static int vp_vdpa_set_features(struct vdpa_device *vdpa, u64 features) return 0; } +static u64 vp_vdpa_get_driver_features(struct vdpa_device *vdpa) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return vp_modern_get_driver_features(mdev); +} + static u8 vp_vdpa_get_status(struct vdpa_device *vdpa) { struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); @@ -415,8 +422,9 @@ vp_vdpa_get_vq_notification(struct vdpa_device *vdpa, u16 qid) } static const struct vdpa_config_ops vp_vdpa_ops = { - .get_features = vp_vdpa_get_features, - .set_features = vp_vdpa_set_features, + .get_device_features = vp_vdpa_get_device_features, + .set_driver_features = vp_vdpa_set_driver_features, + .get_driver_features = vp_vdpa_get_driver_features, .get_status = vp_vdpa_get_status, .set_status = vp_vdpa_set_status, .reset = vp_vdpa_reset, diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ecfccd687ea0..a8fa7fc6db1e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -262,7 +262,7 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) const struct vdpa_config_ops *ops = vdpa->config; u64 features; - features = ops->get_features(vdpa); + features = ops->get_device_features(vdpa); if (copy_to_user(featurep, &features, sizeof(features))) return -EFAULT; diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index f85f860bc10b..a84b04ba3195 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -308,7 +308,7 @@ static u64 virtio_vdpa_get_features(struct virtio_device *vdev) struct vdpa_device *vdpa = vd_get_vdpa(vdev); const struct vdpa_config_ops *ops = vdpa->config; - return ops->get_features(vdpa); + return ops->get_device_features(vdpa); } static int virtio_vdpa_finalize_features(struct virtio_device *vdev) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2b7db96bb7d3..9cc4291a79b3 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -171,14 +171,17 @@ struct vdpa_map_file { * for the device * @vdev: vdpa device * Returns virtqueue algin requirement - * @get_features: Get virtio features supported by the device + * @get_device_features: Get virtio features supported by the device * @vdev: vdpa device * Returns the virtio features support by the * device - * @set_features: Set virtio features supported by the driver + * @set_driver_features: Set virtio features supported by the driver * @vdev: vdpa device * @features: feature support by the driver * Returns integer: success (0) or error (< 0) + * @get_driver_features: Get the virtio driver features in action + * @vdev: vdpa device + * Returns the virtio features accepted * @set_config_cb: Set the config interrupt callback * @vdev: vdpa device * @cb: virtio-vdev interrupt callback structure @@ -278,8 +281,9 @@ struct vdpa_config_ops { /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); - u64 (*get_features)(struct vdpa_device *vdev); - int (*set_features)(struct vdpa_device *vdev, u64 features); + u64 (*get_device_features)(struct vdpa_device *vdev); + int (*set_driver_features)(struct vdpa_device *vdev, u64 features); + u64 (*get_driver_features)(struct vdpa_device *vdev); void (*set_config_cb)(struct vdpa_device *vdev, struct vdpa_callback *cb); u16 (*get_vq_num_max)(struct vdpa_device *vdev); @@ -397,7 +401,7 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) const struct vdpa_config_ops *ops = vdev->config; vdev->features_valid = true; - return ops->set_features(vdev, features); + return ops->set_driver_features(vdev, features); } void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, From a7f46ba42485394edf9836969e220878f4908465 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:34 +0200 Subject: [PATCH 449/493] vdpa/mlx5: Distribute RX virtqueues in RQT object Distribute the available rx virtqueues amongst the available RQT entries. RQTs require to have a power of two entries. When creating or modifying the RQT, use the lowest number of power of two entries that is not less than the number of rx virtqueues. Distribute them in the available entries such that some virtqueus may be referenced twice. This allows to configure any number of virtqueue pairs when multiqueue is used. Reviewed-by: Si-Wei Liu Acked-by: Jason Wang Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-3-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 3206e355230c..8178936b1f3e 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1259,17 +1259,10 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt); list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); - for (i = 0, j = 0; j < max_rqt; j++) { - if (!ndev->vqs[j].initialized) - continue; - - if (!vq_is_tx(ndev->vqs[j].index)) { - list[i] = cpu_to_be32(ndev->vqs[j].virtq_id); - i++; - } - } - MLX5_SET(rqtc, rqtc, rqt_actual_size, i); + for (i = 0, j = 0; i < max_rqt; i++, j += 2) + list[i] = cpu_to_be32(ndev->vqs[j % ndev->mvdev.max_vqs].virtq_id); + MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt); err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); kfree(in); if (err) @@ -1290,7 +1283,7 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) int i, j; int err; - max_rqt = min_t(int, ndev->cur_num_vqs / 2, + max_rqt = min_t(int, roundup_pow_of_two(ndev->cur_num_vqs / 2), 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); if (max_rqt < 1) return -EOPNOTSUPP; @@ -1306,16 +1299,10 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); - for (i = 0, j = 0; j < num; j++) { - if (!ndev->vqs[j].initialized) - continue; + for (i = 0, j = 0; i < max_rqt; i++, j += 2) + list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); - if (!vq_is_tx(ndev->vqs[j].index)) { - list[i] = cpu_to_be32(ndev->vqs[j].virtq_id); - i++; - } - } - MLX5_SET(rqtc, rqtc, rqt_actual_size, i); + MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt); err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); kfree(in); if (err) @@ -1579,9 +1566,6 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) break; } - if (newqps & (newqps - 1)) - break; - if (!change_num_qps(mvdev, newqps)) status = VIRTIO_NET_OK; From 73bc0dbb591baea322a7319c735e5f6c7dba9cfb Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:35 +0200 Subject: [PATCH 450/493] vdpa: Sync calls set/get config/status with cf_mutex Add wrappers to get/set status and protect these operations with cf_mutex to serialize these operations with respect to get/set config operations. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-4-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 19 +++++++++++++++++++ drivers/vhost/vdpa.c | 7 +++---- drivers/virtio/virtio_vdpa.c | 3 +-- include/linux/vdpa.h | 3 +++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 42d71d60d5dc..5134c83c4a22 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head); static DEFINE_MUTEX(vdpa_dev_mutex); static DEFINE_IDA(vdpa_index_ida); +u8 vdpa_get_status(struct vdpa_device *vdev) +{ + u8 status; + + mutex_lock(&vdev->cf_mutex); + status = vdev->config->get_status(vdev); + mutex_unlock(&vdev->cf_mutex); + return status; +} +EXPORT_SYMBOL(vdpa_get_status); + +void vdpa_set_status(struct vdpa_device *vdev, u8 status) +{ + mutex_lock(&vdev->cf_mutex); + vdev->config->set_status(vdev, status); + mutex_unlock(&vdev->cf_mutex); +} +EXPORT_SYMBOL(vdpa_set_status); + static struct genl_family vdpa_nl_family; static int vdpa_dev_probe(struct device *d) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index a8fa7fc6db1e..293c51fdf9ab 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; u8 status; - status = ops->get_status(vdpa); + status = vdpa_get_status(vdpa); if (copy_to_user(statusp, &status, sizeof(status))) return -EFAULT; @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) if (copy_from_user(&status, statusp, sizeof(status))) return -EFAULT; - status_old = ops->get_status(vdpa); + status_old = vdpa_get_status(vdpa); /* * Userspace shouldn't remove status bits unless reset the @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) if (ret) return ret; } else - ops->set_status(vdpa, status); + vdpa_set_status(vdpa, status); if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) for (i = 0; i < nvqs; i++) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index a84b04ba3195..76504559bc25 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct virtio_device *vdev) static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; - return ops->set_status(vdpa, status); + return vdpa_set_status(vdpa, status); } static void virtio_vdpa_reset(struct virtio_device *vdev) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 9cc4291a79b3..ae047fae2603 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, const void *buf, unsigned int length); +u8 vdpa_get_status(struct vdpa_device *vdev); +void vdpa_set_status(struct vdpa_device *vdev, u8 status); + /** * struct vdpa_mgmtdev_ops - vdpa device ops * @dev_add: Add a vdpa device using alloc and register From 30ef7a8ac8a07046b9ac9206d3a732a9f76b2e60 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:36 +0200 Subject: [PATCH 451/493] vdpa: Read device configuration only if FEATURES_OK Avoid reading device configuration during feature negotiation. Read device status and verify that VIRTIO_CONFIG_S_FEATURES_OK is set. Protect the entire operation, including configuration read with cf_mutex to ensure integrity of the results. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-5-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa.c | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 5134c83c4a22..4494325cae91 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -393,6 +393,21 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev) } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); +static void vdpa_get_config_unlocked(struct vdpa_device *vdev, + unsigned int offset, + void *buf, unsigned int len) +{ + const struct vdpa_config_ops *ops = vdev->config; + + /* + * Config accesses aren't supposed to trigger before features are set. + * If it does happen we assume a legacy guest. + */ + if (!vdev->features_valid) + vdpa_set_features(vdev, 0); + ops->get_config(vdev, offset, buf, len); +} + /** * vdpa_get_config - Get one or more device configuration fields. * @vdev: vdpa device to operate on @@ -403,16 +418,8 @@ EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len) { - const struct vdpa_config_ops *ops = vdev->config; - mutex_lock(&vdev->cf_mutex); - /* - * Config accesses aren't supposed to trigger before features are set. - * If it does happen we assume a legacy guest. - */ - if (!vdev->features_valid) - vdpa_set_features(vdev, 0); - ops->get_config(vdev, offset, buf, len); + vdpa_get_config_unlocked(vdev, offset, buf, len); mutex_unlock(&vdev->cf_mutex); } EXPORT_SYMBOL_GPL(vdpa_get_config); @@ -813,7 +820,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms u64 features; u16 val_u16; - vdpa_get_config(vdev, 0, &config, sizeof(config)); + vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac), config.mac)) @@ -838,12 +845,23 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, { u32 device_id; void *hdr; + u8 status; int err; + mutex_lock(&vdev->cf_mutex); + status = vdev->config->get_status(vdev); + if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { + NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed"); + err = -EAGAIN; + goto out; + } + hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_CONFIG_GET); - if (!hdr) - return -EMSGSIZE; + if (!hdr) { + err = -EMSGSIZE; + goto out; + } if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { err = -EMSGSIZE; @@ -867,11 +885,14 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, if (err) goto msg_err; + mutex_unlock(&vdev->cf_mutex); genlmsg_end(msg, hdr); return 0; msg_err: genlmsg_cancel(msg, hdr); +out: + mutex_unlock(&vdev->cf_mutex); return err; } From aba21aff772b8622e08f07219069be793429a48f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:37 +0200 Subject: [PATCH 452/493] vdpa: Allow to configure max data virtqueues Add netlink support to configure the max virtqueue pairs for a device. At least one pair is required. The maximum is dictated by the device. Example: $ vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 max_vqp 4 Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-6-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 15 +++++++++++++-- drivers/vhost/vdpa.c | 2 +- drivers/virtio/virtio_vdpa.c | 2 +- include/linux/vdpa.h | 19 ++++++++++++++++--- 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 4494325cae91..96d31b80fdce 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -404,7 +404,7 @@ static void vdpa_get_config_unlocked(struct vdpa_device *vdev, * If it does happen we assume a legacy guest. */ if (!vdev->features_valid) - vdpa_set_features(vdev, 0); + vdpa_set_features(vdev, 0, true); ops->get_config(vdev, offset, buf, len); } @@ -581,7 +581,8 @@ out: } #define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ - (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) + (1 << VDPA_ATTR_DEV_NET_CFG_MTU) | \ + (1 << VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info) { @@ -607,6 +608,16 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]); config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU); } + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]) { + config.net.max_vq_pairs = + nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]); + if (!config.net.max_vq_pairs) { + NL_SET_ERR_MSG_MOD(info->extack, + "At least one pair of VQs is required"); + return -EINVAL; + } + config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); + } /* Skip checking capability if user didn't prefer to configure any * device networking attributes. It is likely that user might have used diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 293c51fdf9ab..6e7edaf2472b 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -285,7 +285,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (vdpa_set_features(vdpa, features)) + if (vdpa_set_features(vdpa, features, false)) return -EINVAL; return 0; diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 76504559bc25..7767a7f0119b 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -317,7 +317,7 @@ static int virtio_vdpa_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); - return vdpa_set_features(vdpa, vdev->features); + return vdpa_set_features(vdpa, vdev->features, false); } static const char *virtio_vdpa_bus_name(struct virtio_device *vdev) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index ae047fae2603..6d4d7e4fe208 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -101,6 +101,7 @@ struct vdpa_dev_set_config { struct { u8 mac[ETH_ALEN]; u16 mtu; + u16 max_vq_pairs; } net; u64 mask; }; @@ -391,17 +392,29 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) static inline int vdpa_reset(struct vdpa_device *vdev) { const struct vdpa_config_ops *ops = vdev->config; + int ret; + mutex_lock(&vdev->cf_mutex); vdev->features_valid = false; - return ops->reset(vdev); + ret = ops->reset(vdev); + mutex_unlock(&vdev->cf_mutex); + return ret; } -static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) +static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked) { const struct vdpa_config_ops *ops = vdev->config; + int ret; + + if (!locked) + mutex_lock(&vdev->cf_mutex); vdev->features_valid = true; - return ops->set_driver_features(vdev, features); + ret = ops->set_driver_features(vdev, features); + if (!locked) + mutex_unlock(&vdev->cf_mutex); + + return ret; } void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, From e3137056e6dedee205fccd06da031a285c6e34f5 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:38 +0200 Subject: [PATCH 453/493] vdpa/mlx5: Fix config_attr_mask assignment Fix VDPA_ATTR_DEV_NET_CFG_MACADDR assignment to be explicit 64 bit assignment. No issue was seen since the value is well below 64 bit max value. Nevertheless it needs to be fixed. Fixes: a007d940040c ("vdpa/mlx5: Support configuration of MAC") Reviewed-by: Si-Wei Liu Acked-by: Jason Wang Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-7-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 8178936b1f3e..eaac26772687 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2667,7 +2667,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, mgtdev->mgtdev.ops = &mdev_ops; mgtdev->mgtdev.device = mdev->device; mgtdev->mgtdev.id_table = id_table; - mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); + mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); mgtdev->madev = madev; err = vdpa_mgmtdev_register(&mgtdev->mgtdev); From 75560522eaef2f7c7b2fec3c22e1254244f52372 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:39 +0200 Subject: [PATCH 454/493] vdpa/mlx5: Support configuring max data virtqueue Check whether the max number of data virtqueue pairs was provided when a adding a new device and verify the new value does not exceed device capabilities. In addition, change the arrays holding virtqueue and callback contexts to be dynamically allocated. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-8-elic@nvidia.com Includes fixup: vdpa/mlx5: fix error handling in mlx5_vdpa_dev_add() Clang build fails with mlx5_vnet.c:2574:6: error: variable 'mvdev' is used uninitialized whenever 'if' condition is true if (!ndev->vqs || !ndev->event_cbs) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mlx5_vnet.c:2660:14: note: uninitialized use occurs here put_device(&mvdev->vdev.dev); ^~~~~ This because mvdev is set after trying to allocate ndev->vqs,event_cbs. So move the allocation to after mvdev is set but before the arrays are used in init_mvqs() Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20220107211352.3940570-1-trix@redhat.com Signed-off-by: Michael S. Tsirkin Includes fixup: vdpa/mlx5: fix endian-ness for max vqs sparse warnings: (new ones prefixed by >>) >> drivers/vdpa/mlx5/net/mlx5_vnet.c:1247:23: sparse: sparse: cast to restricted __le16 >> drivers/vdpa/mlx5/net/mlx5_vnet.c:1247:23: sparse: sparse: cast from restricted __virtio16 > 1247 num = le16_to_cpu(ndev->config.max_virtqueue_pairs); Address this using the appropriate wrapper. Cc: "Eli Cohen" Reported-by: kernel test robot Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Eli Cohen --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 54 +++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index eaac26772687..f31de859d175 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue { struct mlx5_vq_restore_info ri; }; -/* We will remove this limitation once mlx5_vdpa_alloc_resources() - * provides for driver space allocation - */ -#define MLX5_MAX_SUPPORTED_VQS 16 - static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) { if (unlikely(idx > mvdev->max_idx)) @@ -148,8 +143,8 @@ struct mlx5_vdpa_net { struct mlx5_vdpa_dev mvdev; struct mlx5_vdpa_net_resources res; struct virtio_net_config config; - struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS]; - struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1]; + struct mlx5_vdpa_virtqueue *vqs; + struct vdpa_callback *event_cbs; /* Serialize vq resources creation and destruction. This is required * since memory map might change and we need to destroy and create @@ -1216,7 +1211,7 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev) { int i; - for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++) + for (i = 0; i < ndev->mvdev.max_vqs; i++) suspend_vq(ndev, &ndev->vqs[i]); } @@ -1242,8 +1237,15 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) void *in; int i, j; int err; + int num; - max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2, + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) + num = 1; + else + num = mlx5vdpa16_to_cpu(&ndev->mvdev, + ndev->config.max_virtqueue_pairs); + + max_rqt = min_t(int, roundup_pow_of_two(num), 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); if (max_rqt < 1) return -EOPNOTSUPP; @@ -1260,7 +1262,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt); list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); for (i = 0, j = 0; i < max_rqt; i++, j += 2) - list[i] = cpu_to_be32(ndev->vqs[j % ndev->mvdev.max_vqs].virtq_id); + list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id); MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt); err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); @@ -2218,7 +2220,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; - memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs)); + memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); ndev->mvdev.actual_features = 0; ++mvdev->generation; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { @@ -2291,6 +2293,8 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) } mlx5_vdpa_free_resources(&ndev->mvdev); mutex_destroy(&ndev->reslock); + kfree(ndev->event_cbs); + kfree(ndev->vqs); } static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) @@ -2536,9 +2540,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, return -EOPNOTSUPP; } - /* we save one virtqueue for control virtqueue should we require it */ max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); - max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); + if (max_vqs < 2) { + dev_warn(mdev->device, + "%d virtqueues are supported. At least 2 are required\n", + max_vqs); + return -EAGAIN; + } + + if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { + if (add_config->net.max_vq_pairs > max_vqs / 2) + return -EINVAL; + max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); + } else { + max_vqs = 2; + } ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, name, false); @@ -2548,6 +2564,14 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->mvdev.max_vqs = max_vqs; mvdev = &ndev->mvdev; mvdev->mdev = mdev; + + ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); + ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); + if (!ndev->vqs || !ndev->event_cbs) { + err = -ENOMEM; + goto err_alloc; + } + init_mvqs(ndev); mutex_init(&ndev->reslock); config = &ndev->config; @@ -2625,6 +2649,7 @@ err_mpfs: mlx5_mpfs_del_mac(pfmdev, config->mac); err_mtu: mutex_destroy(&ndev->reslock); +err_alloc: put_device(&mvdev->vdev.dev); return err; } @@ -2667,7 +2692,8 @@ static int mlx5v_probe(struct auxiliary_device *adev, mgtdev->mgtdev.ops = &mdev_ops; mgtdev->mgtdev.device = mdev->device; mgtdev->mgtdev.id_table = id_table; - mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); + mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | + BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); mgtdev->madev = madev; err = vdpa_mgmtdev_register(&mgtdev->mgtdev); From 612f330ec56f12c0d099286c45f82d835845f136 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:40 +0200 Subject: [PATCH 455/493] vdpa: Add support for returning device configuration information Add netlink attribute to store the negotiated features. This can be used by userspace to get the current state of the vdpa instance. Examples: $ vdpa dev config show vdpa-a vdpa-a: mac 00:00:00:00:88:88 link up link_announce false max_vq_pairs 16 mtu 1500 negotiated_features CSUM GUEST_CSUM MTU MAC HOST_TSO4 HOST_TSO6 STATUS \ CTRL_VQ MQ CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM $ vdpa -j dev config show vdpa-a {"config":{"vdpa-a":{"mac":"00:00:00:00:88:88","link ":"up","link_announce":false, \ "max_vq_pairs":16,"mtu":1500,"negotiated_features":["CSUM","GUEST_CSUM","MTU","MAC", \ "HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ","CTRL_MAC_ADDR","VERSION_1", \ "ACCESS_PLATFORM"]}}} $ vdpa -jp dev config show vdpa-a { "config": { "vdpa-a": { "mac": "00:00:00:00:88:88", "link ": "up", "link_announce ": false, "max_vq_pairs": 16, "mtu": 1500, "negotiated_features": [ "CSUM","GUEST_CSUM","MTU","MAC","HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ", \ "CTRL_MAC_ADDR","VERSION_1","ACCESS_PLATFORM" ] } } } Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-9-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa.c | 3 +++ include/uapi/linux/vdpa.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 96d31b80fdce..60cf821175fa 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -846,6 +846,9 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms return -EMSGSIZE; features = vdev->config->get_driver_features(vdev); + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features, + VDPA_ATTR_PAD)) + return -EMSGSIZE; return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config); } diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index a252f06f9dfd..db3738ef3beb 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -23,6 +23,9 @@ enum vdpa_command { enum vdpa_attr { VDPA_ATTR_UNSPEC, + /* Pad attribute for 64b alignment */ + VDPA_ATTR_PAD = VDPA_ATTR_UNSPEC, + /* bus name (optional) + dev name together make the parent device handle */ VDPA_ATTR_MGMTDEV_BUS_NAME, /* string */ VDPA_ATTR_MGMTDEV_DEV_NAME, /* string */ @@ -40,6 +43,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */ VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ + VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, }; From 37e07e705888e4c3502f204e9c6785c9c2d6d86a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:41 +0200 Subject: [PATCH 456/493] vdpa/mlx5: Restore cur_num_vqs in case of failure in change_num_qps() Restore ndev->cur_num_vqs to the original value in case change_num_qps() fails. Fixes: 52893733f2c5 ("vdpa/mlx5: Add multiqueue support") Reviewed-by: Si-Wei Liu Acked-by: Jason Wang Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-10-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index f31de859d175..931109db1517 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1541,9 +1541,11 @@ static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) return 0; clean_added: - for (--i; i >= cur_qps; --i) + for (--i; i >= 2 * cur_qps; --i) teardown_vq(ndev, &ndev->vqs[i]); + ndev->cur_num_vqs = 2 * cur_qps; + return err; } From cd2629f6df1cab5b3df34705ae7f3bde6147fce3 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:42 +0200 Subject: [PATCH 457/493] vdpa: Support reporting max device capabilities Add max_supported_vqs and supported_features fields to struct vdpa_mgmt_dev. Upstream drivers need to feel these values according to the device capabilities. These values are reported back in a netlink message when showing management devices. Examples: $ auxiliary/mlx5_core.sf.1: supported_classes net max_supported_vqs 257 dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \ CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM $ vdpa -j mgmtdev show {"mgmtdev":{"auxiliary/mlx5_core.sf.1":{"supported_classes":["net"], \ "max_supported_vqs":257,"dev_features":["CSUM","GUEST_CSUM","MTU", \ "HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ","CTRL_MAC_ADDR", \ "VERSION_1","ACCESS_PLATFORM"]}}} $ vdpa -jp mgmtdev show { "mgmtdev": { "auxiliary/mlx5_core.sf.1": { "supported_classes": [ "net" ], "max_supported_vqs": 257, "dev_features": ["CSUM","GUEST_CSUM","MTU","HOST_TSO4", \ "HOST_TSO6","STATUS","CTRL_VQ","MQ", \ "CTRL_MAC_ADDR","VERSION_1","ACCESS_PLATFORM"] } } } Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-11-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu --- drivers/vdpa/vdpa.c | 10 ++++++++++ include/linux/vdpa.h | 2 ++ include/uapi/linux/vdpa.h | 2 ++ 3 files changed, 14 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 60cf821175fa..34fa251db8cc 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -514,6 +514,16 @@ static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *m err = -EMSGSIZE; goto msg_err; } + if (nla_put_u32(msg, VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, + mdev->max_supported_vqs)) { + err = -EMSGSIZE; + goto msg_err; + } + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_SUPPORTED_FEATURES, + mdev->supported_features, VDPA_ATTR_PAD)) { + err = -EMSGSIZE; + goto msg_err; + } genlmsg_end(msg, hdr); return 0; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 6d4d7e4fe208..a6047fd6cf12 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -460,6 +460,8 @@ struct vdpa_mgmt_dev { const struct virtio_device_id *id_table; u64 config_attr_mask; struct list_head list; + u64 supported_features; + u32 max_supported_vqs; }; int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev); diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index db3738ef3beb..1061d8d2d09d 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -44,6 +44,8 @@ enum vdpa_attr { VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ + VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */ + VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, }; From 79de65edf8891725616f4992cee5d5963900f07a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:43 +0200 Subject: [PATCH 458/493] vdpa/mlx5: Report max device capabilities Configure max supported virtqueues and features on the management device. This info can be retrieved using: $ vdpa mgmtdev show auxiliary/mlx5_core.sf.1: supported_classes net max_supported_vqs 257 dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \ CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-12-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 35 ++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 931109db1517..d1ff65065fb1 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1866,22 +1866,29 @@ static u64 mlx_to_vritio_features(u16 dev_features) return result; } +static u64 get_supported_features(struct mlx5_core_dev *mdev) +{ + u64 mlx_vdpa_features = 0; + u16 dev_features; + + dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); + mlx_vdpa_features |= mlx_to_vritio_features(dev_features); + if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) + mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); + mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); + + return mlx_vdpa_features; +} + static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - u16 dev_features; - - dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask); - ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features); - if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0)) - ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1); - ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); - ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); - ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); - ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ); - ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS); - ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MTU); print_features(mvdev, ndev->mvdev.mlx_features, false); return ndev->mvdev.mlx_features; @@ -2563,6 +2570,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, if (IS_ERR(ndev)) return PTR_ERR(ndev); + ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features; ndev->mvdev.max_vqs = max_vqs; mvdev = &ndev->mvdev; mvdev->mdev = mdev; @@ -2696,6 +2704,9 @@ static int mlx5v_probe(struct auxiliary_device *adev, mgtdev->mgtdev.id_table = id_table; mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); + mgtdev->mgtdev.max_supported_vqs = + MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; + mgtdev->mgtdev.supported_features = get_supported_features(mdev); mgtdev->madev = madev; err = vdpa_mgmtdev_register(&mgtdev->mgtdev); From cbe777e98b3a420a6d66b7ce7780334c48eccc88 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:44 +0200 Subject: [PATCH 459/493] vdpa/vdpa_sim: Configure max supported virtqueues Configure max supported virtqueues on the management device. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-13-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 76dd24abc791..46aabc73263a 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -191,6 +191,7 @@ static struct vdpa_mgmt_dev mgmt_dev = { .ops = &vdpasim_net_mgmtdev_ops, .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | 1 << VDPA_ATTR_DEV_NET_CFG_MTU), + .max_supported_vqs = VDPASIM_NET_VQ_NUM, }; static int __init vdpasim_net_init(void) From 47a1401ac95f95936148b813843ee51d516921ea Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:45 +0200 Subject: [PATCH 460/493] vdpa: Use BIT_ULL for bit operations All masks in this file are 64 bits. Change BIT to BIT_ULL. Other occurences use (1 << val) which yields a 32 bit value. Change them to use BIT_ULL too. Reviewed-by: Si-Wei Liu Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-14-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 34fa251db8cc..4380367d00b5 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -590,9 +590,9 @@ out: return msg->len; } -#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ - (1 << VDPA_ATTR_DEV_NET_CFG_MTU) | \ - (1 << VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) +#define VDPA_DEV_NET_ATTRS_MASK (BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ + BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | \ + BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info) { @@ -611,12 +611,12 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); memcpy(config.net.mac, macaddr, sizeof(config.net.mac)); - config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); + config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); } if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) { config.net.mtu = nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]); - config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU); + config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU); } if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]) { config.net.max_vq_pairs = @@ -828,7 +828,7 @@ static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, { u16 val_u16; - if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0) + if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0) return 0; val_u16 = le16_to_cpu(config->max_virtqueue_pairs); From b2ce6197c9c9be0ecc2a636f3b2f35886ce09a98 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:46 +0200 Subject: [PATCH 461/493] vdpa/vdpa_sim_net: Report max device capabilities Configure max supported virtqueues features on the management device. This info can be retrieved using: $ vdpa mgmtdev show vdpasim_net: supported_classes net max_supported_vqs 2 dev_features MAC ANY_LAYOUT VERSION_1 ACCESS_PLATFORM Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-15-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 46aabc73263a..d5324f6fd8c7 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -192,6 +192,7 @@ static struct vdpa_mgmt_dev mgmt_dev = { .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | 1 << VDPA_ATTR_DEV_NET_CFG_MTU), .max_supported_vqs = VDPASIM_NET_VQ_NUM, + .supported_features = VDPASIM_NET_FEATURES, }; static int __init vdpasim_net_init(void) From f6d955d80830b6e6f6a170be68cc3628f36365dd Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 11 Jan 2022 20:33:57 +0200 Subject: [PATCH 462/493] vdpa: Avoid taking cf_mutex lock on get status Avoid the wrapper holding cf_mutex since it is not protecting anything. To avoid confusion and unnecessary overhead incurred by it, remove. Fixes: f489f27bc0ab ("vdpa: Sync calls set/get config/status with cf_mutex") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220111183400.38418-2-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu Acked-by: Jason Wang --- drivers/vdpa/vdpa.c | 11 ----------- drivers/vhost/vdpa.c | 5 +++-- include/linux/vdpa.h | 1 - 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 4380367d00b5..9846c9de4bfa 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -21,17 +21,6 @@ static LIST_HEAD(mdev_head); static DEFINE_MUTEX(vdpa_dev_mutex); static DEFINE_IDA(vdpa_index_ida); -u8 vdpa_get_status(struct vdpa_device *vdev) -{ - u8 status; - - mutex_lock(&vdev->cf_mutex); - status = vdev->config->get_status(vdev); - mutex_unlock(&vdev->cf_mutex); - return status; -} -EXPORT_SYMBOL(vdpa_get_status); - void vdpa_set_status(struct vdpa_device *vdev, u8 status) { mutex_lock(&vdev->cf_mutex); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 6e7edaf2472b..0ed6cbadb52d 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -142,9 +142,10 @@ static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) { struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; u8 status; - status = vdpa_get_status(vdpa); + status = ops->get_status(vdpa); if (copy_to_user(statusp, &status, sizeof(status))) return -EFAULT; @@ -163,7 +164,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) if (copy_from_user(&status, statusp, sizeof(status))) return -EFAULT; - status_old = vdpa_get_status(vdpa); + status_old = ops->get_status(vdpa); /* * Userspace shouldn't remove status bits unless reset the diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index a6047fd6cf12..2de442ececae 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -421,7 +421,6 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, const void *buf, unsigned int length); -u8 vdpa_get_status(struct vdpa_device *vdev); void vdpa_set_status(struct vdpa_device *vdev, u8 status); /** From 680ab9d69a04cfd9f3f5fedaacbc1974b2959121 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 11 Jan 2022 20:33:58 +0200 Subject: [PATCH 463/493] vdpa: Protect vdpa reset with cf_mutex Call reset using the wrapper function vdpa_reset() to make sure the operation is serialized with cf_mutex. This comes to protect from the following possible scenario: vhost_vdpa_set_status() could call the reset op. Since the call is not protected by cf_mutex, a netlink thread calling vdpa_dev_config_fill could get passed the VIRTIO_CONFIG_S_FEATURES_OK check in vdpa_dev_config_fill() and end up reporting wrong features. Fixes: 5f6e85953d8f ("vdpa: Read device configuration only if FEATURES_OK") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220111183400.38418-3-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 0ed6cbadb52d..851539807bc9 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -178,7 +178,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) vhost_vdpa_unsetup_vq_irq(v, i); if (status == 0) { - ret = ops->reset(vdpa); + ret = vdpa_reset(vdpa); if (ret) return ret; } else From f8ae3a489b21b05c39a0a1a7734f2a0188852177 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 11 Jan 2022 20:33:59 +0200 Subject: [PATCH 464/493] vdpa/mlx5: Fix is_index_valid() to refer to features Make sure the decision whether an index received through a callback is valid or not consults the negotiated features. The motivation for this was due to a case encountered where I shut down the VM. After the reset operation was called features were already clear, I got get_vq_state() call which caused out array bounds access since is_index_valid() reported the index value. So this is more of not hit a bug since the call shouldn't have been made first place. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220111183400.38418-4-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index d1ff65065fb1..9eacfdb48434 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -133,10 +133,14 @@ struct mlx5_vdpa_virtqueue { static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) { - if (unlikely(idx > mvdev->max_idx)) - return false; + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) + return idx < 2; + else + return idx < 3; + } - return true; + return idx <= mvdev->max_idx; } struct mlx5_vdpa_net { From b03fc43e73877e180c1803a33aea3e7396642367 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 11 Jan 2022 20:34:00 +0200 Subject: [PATCH 465/493] vdpa/mlx5: Fix tracking of current number of VQs Modify the code such that ndev->cur_num_vqs better reflects the actual number of data virtqueues. The value can be accurately realized after features have been negotiated. This is to prevent possible failures when modifying the RQT object if the cur_num_vqs bears invalid value. No issue was actually encountered but this also makes the code more readable. Fixes: c5a5cd3d3217 ("vdpa/mlx5: Support configuring max data virtqueue") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220111183400.38418-5-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 9eacfdb48434..b53603d94082 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1246,8 +1246,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) num = 1; else - num = mlx5vdpa16_to_cpu(&ndev->mvdev, - ndev->config.max_virtqueue_pairs); + num = ndev->cur_num_vqs / 2; max_rqt = min_t(int, roundup_pow_of_two(num), 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); @@ -1983,6 +1982,11 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) return err; ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; + if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) + ndev->cur_num_vqs = 2 * mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); + else + ndev->cur_num_vqs = 2; + update_cvq_info(mvdev); return err; } @@ -2233,6 +2237,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; + ndev->cur_num_vqs = 0; memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); ndev->mvdev.actual_features = 0; ++mvdev->generation; @@ -2641,9 +2646,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->nb.notifier_call = event_handler; mlx5_notifier_register(mdev, &ndev->nb); - ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs); mvdev->vdev.mdev = &mgtdev->mgtdev; - err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1); + err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs) + 1); if (err) goto err_reg; From 6fed105a5640e148032ad37208be280ce8cb6915 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 27 Nov 2021 19:09:26 +0100 Subject: [PATCH 466/493] MAINTAINERS: remove Gilles Muller Gilles Muller passed away on November 17, 2021. We would like to thank him for his continued support for the development of Coccinelle. Signed-off-by: Julia Lawall --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 360e9aa0205d..8431e541f9c6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4685,7 +4685,6 @@ F: drivers/media/pci/cobalt/ COCCINELLE/Semantic Patches (SmPL) M: Julia Lawall -M: Gilles Muller M: Nicolas Palix L: cocci@inria.fr (moderated for non-subscribers) S: Supported From 92b2dadaa624d69465dd94ce3d0f30fc2f70170e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 27 Nov 2021 19:10:43 +0100 Subject: [PATCH 467/493] scripts/coccinelle: drop bugon.cocci The BUG_ON script was never safe, in that it was not able to check whether the condition was side-effecting. At this point, BUG_ON should be well known, so it has probably outlived its usefuless. Signed-off-by: Julia Lawall Suggested-by: Matthew Wilcox --- scripts/coccinelle/misc/bugon.cocci | 63 ----------------------------- 1 file changed, 63 deletions(-) delete mode 100644 scripts/coccinelle/misc/bugon.cocci diff --git a/scripts/coccinelle/misc/bugon.cocci b/scripts/coccinelle/misc/bugon.cocci deleted file mode 100644 index 8d595c358408..000000000000 --- a/scripts/coccinelle/misc/bugon.cocci +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/// Use BUG_ON instead of a if condition followed by BUG. -/// -//# This makes an effort to find cases where BUG() follows an if -//# condition on an expression and replaces the if condition and BUG() -//# with a BUG_ON having the conditional expression of the if statement -//# as argument. -// -// Confidence: High -// Copyright: (C) 2014 Himangi Saraogi. -// Comments: -// Options: --no-includes --include-headers - -virtual patch -virtual context -virtual org -virtual report - -//---------------------------------------------------------- -// For context mode -//---------------------------------------------------------- - -@depends on context@ -expression e; -@@ - -*if (e) BUG(); - -//---------------------------------------------------------- -// For patch mode -//---------------------------------------------------------- - -@depends on patch@ -expression e; -@@ - --if (e) BUG(); -+BUG_ON(e); - -//---------------------------------------------------------- -// For org and report mode -//---------------------------------------------------------- - -@r depends on (org || report)@ -expression e; -position p; -@@ - - if (e) BUG@p (); - -@script:python depends on org@ -p << r.p; -@@ - -coccilib.org.print_todo(p[0], "WARNING use BUG_ON") - -@script:python depends on report@ -p << r.p; -@@ - -msg="WARNING: Use BUG_ON instead of if condition followed by BUG.\nPlease make sure the condition has no side effects (see conditional BUG_ON definition in include/asm-generic/bug.h)" -coccilib.report.print_report(p[0], msg) - From 3cdb8e995ee2e393b66d2abe156b90475009ec41 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 26 Dec 2021 11:36:48 +0100 Subject: [PATCH 468/493] drop fen.cocci This semantic patch does not take into account the fact that of_node_put can be safely applied to NULL. Thus it gives only false positives. Drop it. Reported-by: Qing Wang Signed-off-by: Julia Lawall --- scripts/coccinelle/iterators/fen.cocci | 124 ------------------------- 1 file changed, 124 deletions(-) delete mode 100644 scripts/coccinelle/iterators/fen.cocci diff --git a/scripts/coccinelle/iterators/fen.cocci b/scripts/coccinelle/iterators/fen.cocci deleted file mode 100644 index b69f9665f4fb..000000000000 --- a/scripts/coccinelle/iterators/fen.cocci +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/// These iterators only exit normally when the loop cursor is NULL, so there -/// is no point to call of_node_put on the final value. -/// -// Confidence: High -// Copyright: (C) 2010-2012 Nicolas Palix. -// Copyright: (C) 2010-2012 Julia Lawall, INRIA/LIP6. -// Copyright: (C) 2010-2012 Gilles Muller, INRIA/LiP6. -// URL: http://coccinelle.lip6.fr/ -// Comments: -// Options: --no-includes --include-headers - -virtual patch -virtual context -virtual org -virtual report - -@depends on patch@ -iterator name for_each_node_by_name; -expression np,E; -identifier l; -@@ - -for_each_node_by_name(np,...) { - ... when != break; - when != goto l; -} -... when != np = E -- of_node_put(np); - -@depends on patch@ -iterator name for_each_node_by_type; -expression np,E; -identifier l; -@@ - -for_each_node_by_type(np,...) { - ... when != break; - when != goto l; -} -... when != np = E -- of_node_put(np); - -@depends on patch@ -iterator name for_each_compatible_node; -expression np,E; -identifier l; -@@ - -for_each_compatible_node(np,...) { - ... when != break; - when != goto l; -} -... when != np = E -- of_node_put(np); - -@depends on patch@ -iterator name for_each_matching_node; -expression np,E; -identifier l; -@@ - -for_each_matching_node(np,...) { - ... when != break; - when != goto l; -} -... when != np = E -- of_node_put(np); - -// ---------------------------------------------------------------------- - -@r depends on !patch forall@ -//iterator name for_each_node_by_name; -//iterator name for_each_node_by_type; -//iterator name for_each_compatible_node; -//iterator name for_each_matching_node; -expression np,E; -identifier l; -position p1,p2; -@@ - -( -*for_each_node_by_name@p1(np,...) -{ - ... when != break; - when != goto l; -} -| -*for_each_node_by_type@p1(np,...) -{ - ... when != break; - when != goto l; -} -| -*for_each_compatible_node@p1(np,...) -{ - ... when != break; - when != goto l; -} -| -*for_each_matching_node@p1(np,...) -{ - ... when != break; - when != goto l; -} -) -... when != np = E -* of_node_put@p2(np); - -@script:python depends on org@ -p1 << r.p1; -p2 << r.p2; -@@ - -cocci.print_main("unneeded of_node_put",p2) -cocci.print_secs("iterator",p1) - -@script:python depends on report@ -p1 << r.p1; -p2 << r.p2; -@@ - -msg = "ERROR: of_node_put not needed after iterator on line %s" % (p1[0].line) -coccilib.report.print_report(p2[0], msg) From 3ac5f2f2574a8b9e219bb5872166e5db797e349d Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 5 Jan 2022 17:39:09 +0800 Subject: [PATCH 469/493] cifs: Fix smb311_update_preauth_hash() kernel-doc comment Add the description of @server in smb311_update_preauth_hash() kernel-doc comment to remove warning found by running scripts/kernel-doc, which is caused by using 'make W=1'. fs/cifs/smb2misc.c:856: warning: Function parameter or member 'server' not described in 'smb311_update_preauth_hash' Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 396d5afa7cf1..b25623e3fe3d 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -847,6 +847,7 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve * SMB2 header. * * @ses: server session structure + * @server: pointer to server info * @iov: array containing the SMB request we will send to the server * @nvec: number of array entries for the iov */ From dea2903719283c156b53741126228c4a1b40440f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Jan 2022 19:00:02 -0500 Subject: [PATCH 470/493] cifs: move superblock magic defitions to magic.h Help userland apps to identify cifs and smb2 mounts. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 3 ++- fs/cifs/cifsglob.h | 2 -- fs/cifs/smb1ops.c | 3 ++- fs/cifs/smb2glob.h | 2 -- fs/cifs/smb2ops.c | 5 +++-- include/uapi/linux/magic.h | 4 ++++ 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a62a4305f79d..36b2e0cb9736 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "cifsfs.h" #include "cifspdu.h" @@ -202,7 +203,7 @@ cifs_read_super(struct super_block *sb) sb->s_time_max = ts.tv_sec; } - sb->s_magic = CIFS_MAGIC_NUMBER; + sb->s_magic = CIFS_SUPER_MAGIC; sb->s_op = &cifs_super_ops; sb->s_xattr = cifs_xattr_handlers; rc = super_setup_bdi(sb); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f88d2b10045a..f84978b76bb6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -24,8 +24,6 @@ #include "../smbfs_common/smb2pdu.h" #include "smb2pdu.h" -#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ - #define SMB_PATH_MAX 260 #define CIFS_PORT 445 #define RFC1001_PORT 139 diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 6364c09296e8..8272c91e15ef 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -7,6 +7,7 @@ #include #include +#include #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" @@ -887,7 +888,7 @@ cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, { int rc = -EOPNOTSUPP; - buf->f_type = CIFS_MAGIC_NUMBER; + buf->f_type = CIFS_SUPER_MAGIC; /* * We could add a second check for a QFS Unix capability bit diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index ca692b2283cd..4125fd113cfb 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -13,8 +13,6 @@ #ifndef _SMB2_GLOB_H #define _SMB2_GLOB_H -#define SMB2_MAGIC_NUMBER 0xFE534D42 - /* ***************************************************************** * Constants go here diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c2368c9110b0..af5d0830bc8a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "cifsfs.h" #include "cifsglob.h" #include "smb2pdu.h" @@ -2757,7 +2758,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, goto qfs_exit; rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; - buf->f_type = SMB2_MAGIC_NUMBER; + buf->f_type = SMB2_SUPER_MAGIC; info = (struct smb2_fs_full_size_info *)( le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp); rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset), @@ -2799,7 +2800,7 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB311_posix_qfs_info(xid, tcon, fid.persistent_fid, fid.volatile_fid, buf); - buf->f_type = SMB2_MAGIC_NUMBER; + buf->f_type = SMB2_SUPER_MAGIC; SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); return rc; } diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 35687dcb1a42..a9f4dcb7b457 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -51,6 +51,7 @@ #define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */ #define AFS_FS_MAGIC 0x6B414653 + #define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ /* used by file system utilities that look at the superblock, etc. */ @@ -59,6 +60,9 @@ #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" #define SMB_SUPER_MAGIC 0x517B +#define CIFS_SUPER_MAGIC 0xFF534D42 /* the first four bytes of SMB PDUs */ +#define SMB2_SUPER_MAGIC 0xFE534D42 + #define CGROUP_SUPER_MAGIC 0x27e0eb #define CGROUP2_SUPER_MAGIC 0x63677270 From 9bbf8662a27b56358366027d1a77c0676f85b222 Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Tue, 11 Jan 2022 11:36:50 +0300 Subject: [PATCH 471/493] cifs: fix FILE_BOTH_DIRECTORY_INFO definition The size of FILE_BOTH_DIRECTORY_INFO.ShortName must be 24 bytes, not 12 (see MS-FSCC documentation). Signed-off-by: Eugene Korenevsky Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index d2ff438fd31f..68b9a436af4b 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2560,7 +2560,7 @@ typedef struct { __le32 EaSize; /* length of the xattrs */ __u8 ShortNameLength; __u8 Reserved; - __u8 ShortName[12]; + __u8 ShortName[24]; char FileName[1]; } __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FFrsp data */ From d3e2bb4359f70c8b1d09a6f8e2f57240aab0da3f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 14 Jan 2022 22:28:52 -0800 Subject: [PATCH 472/493] perf metric: Fix metric_leader Multiple events may have a metric_leader to aggregate into. This happens for uncore events where, for example, uncore_imc is expanded into uncore_imc_0, uncore_imc_1, etc. Such events all have the same metric_id and should aggregate into the first event. The change introducing metric_ids had a bug where the metric_id was compared to itself, creating an always true condition. Correct this by comparing the event in the metric_evlist and the metric_leader. Fixes: ec5c5b3d2c21b3f3 ("perf metric: Encode and use metric-id as qualifier") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20220115062852.1959424-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8826c555f780..d8492e339521 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -314,7 +314,7 @@ static int setup_metric_events(struct hashmap *ids, */ metric_id = evsel__metric_id(ev); evlist__for_each_entry_continue(metric_evlist, ev) { - if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) + if (!strcmp(evsel__metric_id(ev), metric_id)) ev->metric_leader = metric_events[i]; } } From 37be585807cb9a810f8395c39c4ee7bdbdc7b0dc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 15 Jan 2022 17:11:10 -0300 Subject: [PATCH 473/493] perf cpumap: Add is_dummy() method Needed to check if a cpu_map is dummy, i.e. not a cpu map at all, for pid monitoring scenarios. This probably needs to move to libperf, but since perf itself is the first and so far only user, leave it at tools/perf/util/. Acked-by: Andi Kleen Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index afc15027d678..0d3c2006a15d 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -2,6 +2,7 @@ #ifndef __PERF_CPUMAP_H #define __PERF_CPUMAP_H +#include #include #include #include @@ -50,6 +51,15 @@ int cpu__setup_cpunode_map(void); int cpu__max_node(void); struct perf_cpu cpu__max_cpu(void); struct perf_cpu cpu__max_present_cpu(void); + +/** + * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1. + */ +static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus) +{ + return cpus->nr == 1 && cpus->map[0].cpu == -1; +} + /** * cpu__get_node - Returns the numa node X as read from * /sys/devices/system/node/nodeX for the given CPU. From 2eea0b56b0d6ace0172550477220a25d633ec5b9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 15 Jan 2022 17:15:09 -0300 Subject: [PATCH 474/493] perf evlist: No need to do any affinity setup when profiling pids The cpumap is dummy, so no need to go on figuring out affinity.o This way we reduce the setup time for simple scenarios like: $ perf stat sleep 1 Acked-by: Andi Kleen Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 11eb95b2106b..6e88d404b5b3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1290,7 +1290,7 @@ void evlist__close(struct evlist *evlist) * With perf record core.cpus is usually NULL. * Use the old method to handle this for now. */ - if (!evlist->core.cpus) { + if (!evlist->core.cpus || cpu_map__is_dummy(evlist->core.cpus)) { evlist__for_each_entry_reverse(evlist, evsel) evsel__close(evsel); return; From 9bce13ea88f85344b765abe5d3dabdd0f44dc177 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 9 Dec 2021 21:04:25 +0100 Subject: [PATCH 475/493] perf record: Disable debuginfod by default Fedora 35 sets DEBUGINFOD_URLS by default, which might lead to unexpected stalls in perf record exit path, when we try to cache profiled binaries. # DEBUGINFOD_PROGRESS=1 ./perf record -a ^C[ perf record: Woken up 1 times to write data ] Downloading from https://debuginfod.fedoraproject.org/ 447069 Downloading from https://debuginfod.fedoraproject.org/ 1502175 Downloading \^Z Disabling DEBUGINFOD_URLS by default in perf record and adding debuginfod option and .perfconfig variable support to enable id. Default without debuginfo processing: # perf record -a Using system debuginfod setup: # perf record -a --debuginfod Using custom debuginfd url: # perf record -a --debuginfod='https://evenbetterdebuginfodserver.krava' Adding single perf_debuginfod_setup function and using it also in perf buildid-cache command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Frank Ch. Eigler Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211209200425.303561-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/Documentation/perf-buildid-cache.txt | 5 +++- tools/perf/Documentation/perf-config.txt | 9 +++++++ tools/perf/Documentation/perf-record.txt | 9 +++++++ tools/perf/builtin-buildid-cache.c | 25 +++++++++++-------- tools/perf/builtin-record.c | 13 ++++++++++ tools/perf/util/util.c | 15 +++++++++++ tools/perf/util/util.h | 6 +++++ 7 files changed, 70 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index cd8ce6e8ec12..7e44b419d301 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -74,12 +74,15 @@ OPTIONS used when creating a uprobe for a process that resides in a different mount namespace from the perf(1) utility. ---debuginfod=URLs:: +--debuginfod[=URLs]:: Specify debuginfod URL to be used when retrieving perf.data binaries, it follows the same syntax as the DEBUGINFOD_URLS variable, like: buildid-cache.debuginfod=http://192.168.122.174:8002 + If the URLs is not specified, the value of DEBUGINFOD_URLS + system environment variable is used. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1] diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 3bb75c1f25e8..0420e71698ee 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -587,6 +587,15 @@ record.*:: Use 'n' control blocks in asynchronous (Posix AIO) trace writing mode ('n' default: 1, max: 4). + record.debuginfod:: + Specify debuginfod URL to be used when cacheing perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + http://192.168.122.174:8002 + + If the URLs is 'system', the value of DEBUGINFOD_URLS system environment + variable is used. + diff.*:: diff.order:: This option sets the number of columns to sort the result. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 55df7b073a55..9ccc75935bc5 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -715,6 +715,15 @@ measurements: include::intel-hybrid.txt[] +--debuginfod[=URLs]:: + Specify debuginfod URL to be used when cacheing perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + http://192.168.122.174:8002 + + If the URLs is not specified, the value of DEBUGINFOD_URLS + system environment variable is used. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 0db3cfc04c47..cd381693658b 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -351,10 +351,14 @@ static int build_id_cache__show_all(void) static int perf_buildid_cache_config(const char *var, const char *value, void *cb) { - const char **debuginfod = cb; + struct perf_debuginfod *di = cb; - if (!strcmp(var, "buildid-cache.debuginfod")) - *debuginfod = strdup(value); + if (!strcmp(var, "buildid-cache.debuginfod")) { + di->urls = strdup(value); + if (!di->urls) + return -ENOMEM; + di->set = true; + } return 0; } @@ -373,8 +377,8 @@ int cmd_buildid_cache(int argc, const char **argv) *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, - *kcore_filename = NULL, - *debuginfod = NULL; + *kcore_filename = NULL; + struct perf_debuginfod debuginfod = { }; char sbuf[STRERR_BUFSIZE]; struct perf_data data = { @@ -399,8 +403,10 @@ int cmd_buildid_cache(int argc, const char **argv) OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_STRING('u', "update", &update_name_list_str, "file list", "file(s) to update"), - OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url", - "set debuginfod url"), + OPT_STRING_OPTARG_SET(0, "debuginfod", &debuginfod.urls, + &debuginfod.set, "debuginfod urls", + "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", + "system"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"), OPT_END() @@ -425,10 +431,7 @@ int cmd_buildid_cache(int argc, const char **argv) if (argc || !(list_files || opts_flag)) usage_with_options(buildid_cache_usage, buildid_cache_options); - if (debuginfod) { - pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod); - setenv("DEBUGINFOD_URLS", debuginfod, 1); - } + perf_debuginfod_setup(&debuginfod); /* -l is exclusive. It can not be used with other options. */ if (list_files && opts_flag) { diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0a63295d30f0..bb716c953d02 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -111,6 +111,7 @@ struct record { unsigned long long samples; struct mmap_cpu_mask affinity_mask; unsigned long output_max_size; /* = 0: unlimited */ + struct perf_debuginfod debuginfod; }; static volatile int done; @@ -2177,6 +2178,12 @@ static int perf_record_config(const char *var, const char *value, void *cb) rec->opts.nr_cblocks = nr_cblocks_default; } #endif + if (!strcmp(var, "record.debuginfod")) { + rec->debuginfod.urls = strdup(value); + if (!rec->debuginfod.urls) + return -ENOMEM; + rec->debuginfod.set = true; + } return 0; } @@ -2667,6 +2674,10 @@ static struct option __record_options[] = { parse_control_option), OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", "Fine-tune event synthesis: default=all", parse_record_synth_option), + OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, + &record.debuginfod.set, "debuginfod urls", + "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", + "system"), OPT_END() }; @@ -2720,6 +2731,8 @@ int cmd_record(int argc, const char **argv) if (err) return err; + perf_debuginfod_setup(&record.debuginfod); + /* Make system wide (-a) the default target. */ if (!argc && target__none(&rec->opts.target)) rec->opts.target.system_wide = true; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index df3c4671be72..fb4f6616b5fa 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -416,3 +416,18 @@ char *perf_exe(char *buf, int len) } return strcpy(buf, "perf"); } + +void perf_debuginfod_setup(struct perf_debuginfod *di) +{ + /* + * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod + * processing is not triggered, otherwise we set it to 'di->urls' + * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value. + */ + if (!di->set) + setenv("DEBUGINFOD_URLS", "", 1); + else if (di->urls && strcmp(di->urls, "system")) + setenv("DEBUGINFOD_URLS", di->urls, 1); + + pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS")); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9443c29afa52..7b625cbd2dd8 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -71,4 +71,10 @@ void test_attr__init(void); struct perf_event_attr; void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags); + +struct perf_debuginfod { + const char *urls; + bool set; +}; +void perf_debuginfod_setup(struct perf_debuginfod *di); #endif /* GIT_COMPAT_UTIL_H */ From a6097180d884ddab769fb25588ea8598589c218c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 17 Jan 2022 09:07:26 +1100 Subject: [PATCH 476/493] devtmpfs regression fix: reconfigure on each mount Prior to Linux v5.4 devtmpfs used mount_single() which treats the given mount options as "remount" options, so it updates the configuration of the single super_block on each mount. Since that was changed, the mount options used for devtmpfs are ignored. This is a regression which affect systemd - which mounts devtmpfs with "-o mode=755,size=4m,nr_inodes=1m". This patch restores the "remount" effect by calling reconfigure_single() Fixes: d401727ea0d7 ("devtmpfs: don't mix {ramfs,shmem}_fill_super() with mount_single()") Acked-by: Christian Brauner Cc: Al Viro Signed-off-by: NeilBrown Signed-off-by: Linus Torvalds --- drivers/base/devtmpfs.c | 7 +++++++ fs/super.c | 4 ++-- include/linux/fs_context.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 1e2c2d3882e2..f41063ac1aee 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -65,8 +65,15 @@ static struct dentry *public_dev_mount(struct file_system_type *fs_type, int fla const char *dev_name, void *data) { struct super_block *s = mnt->mnt_sb; + int err; + atomic_inc(&s->s_active); down_write(&s->s_umount); + err = reconfigure_single(s, flags, data); + if (err < 0) { + deactivate_locked_super(s); + return ERR_PTR(err); + } return dget(s->s_root); } diff --git a/fs/super.c b/fs/super.c index 3bfc0f8fbd5b..a6405d44d4ca 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1423,8 +1423,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_nodev); -static int reconfigure_single(struct super_block *s, - int flags, void *data) +int reconfigure_single(struct super_block *s, + int flags, void *data) { struct fs_context *fc; int ret; diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 6b54982fc5f3..13fa6f3df8e4 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -142,6 +142,8 @@ extern void put_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param); extern void fc_drop_locked(struct fs_context *fc); +int reconfigure_single(struct super_block *s, + int flags, void *data); /* * sget() wrappers to be called from the ->get_tree() op. From 722d94847de29310e8aa03fcbdb41fc92c521756 Mon Sep 17 00:00:00 2001 From: Jamie Hill-Daniel Date: Tue, 18 Jan 2022 08:06:04 +0100 Subject: [PATCH 477/493] vfs: fs_context: fix up param length parsing in legacy_parse_param The "PAGE_SIZE - 2 - size" calculation in legacy_parse_param() is an unsigned type so a large value of "size" results in a high positive value instead of a negative value as expected. Fix this by getting rid of the subtraction. Signed-off-by: Jamie Hill-Daniel Signed-off-by: William Liu Tested-by: Salvatore Bonaccorso Tested-by: Thadeu Lima de Souza Cascardo Acked-by: Dan Carpenter Acked-by: Al Viro Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- fs/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs_context.c b/fs/fs_context.c index b7e43a780a62..24ce12f0db32 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -548,7 +548,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) param->key); } - if (len > PAGE_SIZE - 2 - size) + if (size + len + 2 > PAGE_SIZE) return invalf(fc, "VFS: Legacy: Cumulative options too large"); if (strchr(param->key, ',') || (param->type == fs_value_is_string && From e56e18985596617ae426ed5997fb2e737cffb58b Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Wed, 12 Jan 2022 08:01:38 -0600 Subject: [PATCH 478/493] lib/crypto: add prompts back to crypto libraries Commit 6048fdcc5f269 ("lib/crypto: blake2s: include as built-in") took away a number of prompt texts from other crypto libraries. This makes values flip from built-in to module when oldconfig runs, and causes problems when these crypto libs need to be built in for thingslike BIG_KEYS. Fixes: 6048fdcc5f269 ("lib/crypto: blake2s: include as built-in") Cc: Herbert Xu Cc: linux-crypto@vger.kernel.org Signed-off-by: Justin M. Forbes [Jason: - moved menu into submenu of lib/ instead of root menu - fixed chacha sub-dependencies for CONFIG_CRYPTO] Signed-off-by: Jason A. Donenfeld --- crypto/Kconfig | 2 -- lib/Kconfig | 2 ++ lib/crypto/Kconfig | 17 ++++++++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 94bfa32cc6a1..442765219c37 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1928,5 +1928,3 @@ source "crypto/asymmetric_keys/Kconfig" source "certs/Kconfig" endif # if CRYPTO - -source "lib/crypto/Kconfig" diff --git a/lib/Kconfig b/lib/Kconfig index 655b0e43f260..c20b68ad2bc3 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -122,6 +122,8 @@ config INDIRECT_IOMEM_FALLBACK mmio accesses when the IO memory address is not a registered emulated region. +source "lib/crypto/Kconfig" + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 8620f38e117c..e8e525650cf2 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +menu "Crypto library routines" + config CRYPTO_LIB_AES tristate @@ -31,7 +33,7 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select CRYPTO_ALGAPI + select XOR_BLOCKS help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a @@ -40,7 +42,8 @@ config CRYPTO_LIB_CHACHA_GENERIC of CRYPTO_LIB_CHACHA. config CRYPTO_LIB_CHACHA - tristate + tristate "ChaCha library interface" + depends on CRYPTO depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n help @@ -65,7 +68,7 @@ config CRYPTO_LIB_CURVE25519_GENERIC of CRYPTO_LIB_CURVE25519. config CRYPTO_LIB_CURVE25519 - tristate + tristate "Curve25519 scalar multiplication library" depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n help @@ -100,7 +103,7 @@ config CRYPTO_LIB_POLY1305_GENERIC of CRYPTO_LIB_POLY1305. config CRYPTO_LIB_POLY1305 - tristate + tristate "Poly1305 library interface" depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n help @@ -109,14 +112,18 @@ config CRYPTO_LIB_POLY1305 is available and enabled. config CRYPTO_LIB_CHACHA20POLY1305 - tristate + tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)" depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 + depends on CRYPTO select CRYPTO_LIB_CHACHA select CRYPTO_LIB_POLY1305 + select CRYPTO_ALGAPI config CRYPTO_LIB_SHA256 tristate config CRYPTO_LIB_SM4 tristate + +endmenu From d8d83d8ab0a453e17e68b3a3bed1f940c34b8646 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 11 Jan 2022 14:37:41 +0100 Subject: [PATCH 479/493] lib/crypto: blake2s: move hmac construction into wireguard Basically nobody should use blake2s in an HMAC construction; it already has a keyed variant. But unfortunately for historical reasons, Noise, used by WireGuard, uses HKDF quite strictly, which means we have to use this. Because this really shouldn't be used by others, this commit moves it into wireguard's noise.c locally, so that kernels that aren't using WireGuard don't get this superfluous code baked in. On m68k systems, this shaves off ~314 bytes. Cc: Herbert Xu Tested-by: Geert Uytterhoeven Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld --- drivers/net/wireguard/noise.c | 45 ++++++++++++++++++++++++++++++----- include/crypto/blake2s.h | 3 --- lib/crypto/blake2s-selftest.c | 31 ------------------------ lib/crypto/blake2s.c | 37 ---------------------------- 4 files changed, 39 insertions(+), 77 deletions(-) diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index c0cfd9b36c0b..720952b92e78 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -302,6 +302,41 @@ void wg_noise_set_static_identity_private_key( static_identity->static_public, private_key); } +static void hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen) +{ + struct blake2s_state state; + u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; + u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); + int i; + + if (keylen > BLAKE2S_BLOCK_SIZE) { + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, key, keylen); + blake2s_final(&state, x_key); + } else + memcpy(x_key, key, keylen); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, in, inlen); + blake2s_final(&state, i_hash); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x5c ^ 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); + blake2s_final(&state, i_hash); + + memcpy(out, i_hash, BLAKE2S_HASH_SIZE); + memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); + memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); +} + /* This is Hugo Krawczyk's HKDF: * - https://eprint.iacr.org/2010/264.pdf * - https://tools.ietf.org/html/rfc5869 @@ -322,14 +357,14 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, ((third_len || third_dst) && (!second_len || !second_dst)))); /* Extract entropy from data into secret */ - blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); + hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); if (!first_dst || !first_len) goto out; /* Expand first key: key = secret, data = 0x1 */ output[0] = 1; - blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); + hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); memcpy(first_dst, output, first_len); if (!second_dst || !second_len) @@ -337,8 +372,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, /* Expand second key: key = secret, data = first-key || 0x2 */ output[BLAKE2S_HASH_SIZE] = 2; - blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, - BLAKE2S_HASH_SIZE); + hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); memcpy(second_dst, output, second_len); if (!third_dst || !third_len) @@ -346,8 +380,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, /* Expand third key: key = secret, data = second-key || 0x3 */ output[BLAKE2S_HASH_SIZE] = 3; - blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, - BLAKE2S_HASH_SIZE); + hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); memcpy(third_dst, output, third_len); out: diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index df3c6c2f9553..f9ffd39194eb 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, blake2s_final(&state, out); } -void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, - const size_t keylen); - #endif /* _CRYPTO_BLAKE2S_H */ diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 5d9ea53be973..409e4b728770 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -15,7 +15,6 @@ * #include * * #include - * #include * * #define BLAKE2S_TESTVEC_COUNT 256 * @@ -58,16 +57,6 @@ * } * printf("};\n\n"); * - * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); - * - * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL); - * print_vec(hash, BLAKE2S_OUTBYTES); - * - * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL); - * print_vec(hash, BLAKE2S_OUTBYTES); - * - * printf("};\n"); - * * return 0; *} */ @@ -554,15 +543,6 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, }; -static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { - { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70, - 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79, - 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, }, - { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9, - 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f, - 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, }, -}; - bool __init blake2s_selftest(void) { u8 key[BLAKE2S_KEY_SIZE]; @@ -607,16 +587,5 @@ bool __init blake2s_selftest(void) } } - if (success) { - blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key)); - success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE); - - blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf)); - success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE); - - if (!success) - pr_err("blake2s256_hmac self-test: FAIL\n"); - } - return success; } diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 93f2ae051370..9364f79937b8 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -30,43 +30,6 @@ void blake2s_final(struct blake2s_state *state, u8 *out) } EXPORT_SYMBOL(blake2s_final); -void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, - const size_t keylen) -{ - struct blake2s_state state; - u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; - u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); - int i; - - if (keylen > BLAKE2S_BLOCK_SIZE) { - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, key, keylen); - blake2s_final(&state, x_key); - } else - memcpy(x_key, key, keylen); - - for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) - x_key[i] ^= 0x36; - - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); - blake2s_update(&state, in, inlen); - blake2s_final(&state, i_hash); - - for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) - x_key[i] ^= 0x5c ^ 0x36; - - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); - blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); - blake2s_final(&state, i_hash); - - memcpy(out, i_hash, BLAKE2S_HASH_SIZE); - memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); - memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); -} -EXPORT_SYMBOL(blake2s256_hmac); - static int __init blake2s_mod_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && From 9a1536b093bb5bf60689021275fd24d513bb8db0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 11 Jan 2022 18:58:43 +0100 Subject: [PATCH 480/493] lib/crypto: sha1: re-roll loops to reduce code size With SHA-1 no longer being used for anything performance oriented, and also soon to be phased out entirely, we can make up for the space added by unrolled BLAKE2s by simply re-rolling SHA-1. Since SHA-1 is so much more complex, re-rolling it more or less takes care of the code size added by BLAKE2s. And eventually, hopefully we'll see SHA-1 removed entirely from most small kernel builds. Cc: Herbert Xu Cc: Ard Biesheuvel Tested-by: Geert Uytterhoeven Signed-off-by: Jason A. Donenfeld --- lib/sha1.c | 95 ++++++++---------------------------------------------- 1 file changed, 14 insertions(+), 81 deletions(-) diff --git a/lib/sha1.c b/lib/sha1.c index 9bd1935a1472..0494766fc574 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,8 @@ #define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ __u32 TEMP = input(t); setW(t, TEMP); \ E += TEMP + rol32(A,5) + (fn) + (constant); \ - B = ror32(B, 2); } while (0) + B = ror32(B, 2); \ + TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) #define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) #define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) @@ -84,6 +86,7 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array) { __u32 A, B, C, D, E; + unsigned int i = 0; A = digest[0]; B = digest[1]; @@ -92,94 +95,24 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array) E = digest[4]; /* Round 1 - iterations 0-16 take their input from 'data' */ - T_0_15( 0, A, B, C, D, E); - T_0_15( 1, E, A, B, C, D); - T_0_15( 2, D, E, A, B, C); - T_0_15( 3, C, D, E, A, B); - T_0_15( 4, B, C, D, E, A); - T_0_15( 5, A, B, C, D, E); - T_0_15( 6, E, A, B, C, D); - T_0_15( 7, D, E, A, B, C); - T_0_15( 8, C, D, E, A, B); - T_0_15( 9, B, C, D, E, A); - T_0_15(10, A, B, C, D, E); - T_0_15(11, E, A, B, C, D); - T_0_15(12, D, E, A, B, C); - T_0_15(13, C, D, E, A, B); - T_0_15(14, B, C, D, E, A); - T_0_15(15, A, B, C, D, E); + for (; i < 16; ++i) + T_0_15(i, A, B, C, D, E); /* Round 1 - tail. Input from 512-bit mixing array */ - T_16_19(16, E, A, B, C, D); - T_16_19(17, D, E, A, B, C); - T_16_19(18, C, D, E, A, B); - T_16_19(19, B, C, D, E, A); + for (; i < 20; ++i) + T_16_19(i, A, B, C, D, E); /* Round 2 */ - T_20_39(20, A, B, C, D, E); - T_20_39(21, E, A, B, C, D); - T_20_39(22, D, E, A, B, C); - T_20_39(23, C, D, E, A, B); - T_20_39(24, B, C, D, E, A); - T_20_39(25, A, B, C, D, E); - T_20_39(26, E, A, B, C, D); - T_20_39(27, D, E, A, B, C); - T_20_39(28, C, D, E, A, B); - T_20_39(29, B, C, D, E, A); - T_20_39(30, A, B, C, D, E); - T_20_39(31, E, A, B, C, D); - T_20_39(32, D, E, A, B, C); - T_20_39(33, C, D, E, A, B); - T_20_39(34, B, C, D, E, A); - T_20_39(35, A, B, C, D, E); - T_20_39(36, E, A, B, C, D); - T_20_39(37, D, E, A, B, C); - T_20_39(38, C, D, E, A, B); - T_20_39(39, B, C, D, E, A); + for (; i < 40; ++i) + T_20_39(i, A, B, C, D, E); /* Round 3 */ - T_40_59(40, A, B, C, D, E); - T_40_59(41, E, A, B, C, D); - T_40_59(42, D, E, A, B, C); - T_40_59(43, C, D, E, A, B); - T_40_59(44, B, C, D, E, A); - T_40_59(45, A, B, C, D, E); - T_40_59(46, E, A, B, C, D); - T_40_59(47, D, E, A, B, C); - T_40_59(48, C, D, E, A, B); - T_40_59(49, B, C, D, E, A); - T_40_59(50, A, B, C, D, E); - T_40_59(51, E, A, B, C, D); - T_40_59(52, D, E, A, B, C); - T_40_59(53, C, D, E, A, B); - T_40_59(54, B, C, D, E, A); - T_40_59(55, A, B, C, D, E); - T_40_59(56, E, A, B, C, D); - T_40_59(57, D, E, A, B, C); - T_40_59(58, C, D, E, A, B); - T_40_59(59, B, C, D, E, A); + for (; i < 60; ++i) + T_40_59(i, A, B, C, D, E); /* Round 4 */ - T_60_79(60, A, B, C, D, E); - T_60_79(61, E, A, B, C, D); - T_60_79(62, D, E, A, B, C); - T_60_79(63, C, D, E, A, B); - T_60_79(64, B, C, D, E, A); - T_60_79(65, A, B, C, D, E); - T_60_79(66, E, A, B, C, D); - T_60_79(67, D, E, A, B, C); - T_60_79(68, C, D, E, A, B); - T_60_79(69, B, C, D, E, A); - T_60_79(70, A, B, C, D, E); - T_60_79(71, E, A, B, C, D); - T_60_79(72, D, E, A, B, C); - T_60_79(73, C, D, E, A, B); - T_60_79(74, B, C, D, E, A); - T_60_79(75, A, B, C, D, E); - T_60_79(76, E, A, B, C, D); - T_60_79(77, D, E, A, B, C); - T_60_79(78, C, D, E, A, B); - T_60_79(79, B, C, D, E, A); + for (; i < 80; ++i) + T_60_79(i, A, B, C, D, E); digest[0] += A; digest[1] += B; From c0a8a61e7abbf66729687ee63659ee25983fbb1e Mon Sep 17 00:00:00 2001 From: Schspa Shi Date: Fri, 14 Jan 2022 16:12:16 +0800 Subject: [PATCH 481/493] random: fix typo in comments s/or/for Signed-off-by: Schspa Shi Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 227fb7802738..ba8d63f52c05 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -101,7 +101,7 @@ * =============================== * * There are four exported interfaces; two for use within the kernel, - * and two or use from userspace. + * and two for use from userspace. * * Exported interfaces ---- userspace output * ----------------------------------------- From 91ec0fe138f107232cb36bc6112211db37cb5306 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 9 Jan 2022 17:32:02 +0100 Subject: [PATCH 482/493] random: cleanup poolinfo abstraction Now that we're only using one polynomial, we can cleanup its representation into constants, instead of passing around pointers dynamically to select different polynomials. This improves the codegen and makes the code a bit more straightforward. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 67 +++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ba8d63f52c05..349beb318757 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -430,14 +430,20 @@ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; * polynomial which improves the resulting TGFSR polynomial to be * irreducible, which we have made here. */ -static const struct poolinfo { - int poolbitshift, poolwords, poolbytes, poolfracbits; -#define S(x) ilog2(x)+5, (x), (x)*4, (x) << (ENTROPY_SHIFT+5) - int tap1, tap2, tap3, tap4, tap5; -} poolinfo_table[] = { - /* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */ +enum poolinfo { + POOL_WORDS = 128, + POOL_WORDMASK = POOL_WORDS - 1, + POOL_BYTES = POOL_WORDS * sizeof(u32), + POOL_BITS = POOL_BYTES * 8, + POOL_BITSHIFT = ilog2(POOL_WORDS) + 5, + POOL_FRACBITS = POOL_WORDS << (ENTROPY_SHIFT + 5), + /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ - { S(128), 104, 76, 51, 25, 1 }, + POOL_TAP1 = 104, + POOL_TAP2 = 76, + POOL_TAP3 = 51, + POOL_TAP4 = 25, + POOL_TAP5 = 1 }; /* @@ -503,7 +509,6 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); struct entropy_store; struct entropy_store { /* read-only data: */ - const struct poolinfo *poolinfo; __u32 *pool; const char *name; @@ -525,7 +530,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r); static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; static struct entropy_store input_pool = { - .poolinfo = &poolinfo_table[0], .name = "input", .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data @@ -548,33 +552,26 @@ static __u32 const twist_table[8] = { static void _mix_pool_bytes(struct entropy_store *r, const void *in, int nbytes) { - unsigned long i, tap1, tap2, tap3, tap4, tap5; + unsigned long i; int input_rotate; - int wordmask = r->poolinfo->poolwords - 1; const unsigned char *bytes = in; __u32 w; - tap1 = r->poolinfo->tap1; - tap2 = r->poolinfo->tap2; - tap3 = r->poolinfo->tap3; - tap4 = r->poolinfo->tap4; - tap5 = r->poolinfo->tap5; - input_rotate = r->input_rotate; i = r->add_ptr; /* mix one byte at a time to simplify size handling and churn faster */ while (nbytes--) { w = rol32(*bytes++, input_rotate); - i = (i - 1) & wordmask; + i = (i - 1) & POOL_WORDMASK; /* XOR in the various taps */ w ^= r->pool[i]; - w ^= r->pool[(i + tap1) & wordmask]; - w ^= r->pool[(i + tap2) & wordmask]; - w ^= r->pool[(i + tap3) & wordmask]; - w ^= r->pool[(i + tap4) & wordmask]; - w ^= r->pool[(i + tap5) & wordmask]; + w ^= r->pool[(i + POOL_TAP1) & POOL_WORDMASK]; + w ^= r->pool[(i + POOL_TAP2) & POOL_WORDMASK]; + w ^= r->pool[(i + POOL_TAP3) & POOL_WORDMASK]; + w ^= r->pool[(i + POOL_TAP4) & POOL_WORDMASK]; + w ^= r->pool[(i + POOL_TAP5) & POOL_WORDMASK]; /* Mix the result back in with a twist */ r->pool[i] = (w >> 3) ^ twist_table[w & 7]; @@ -672,7 +669,6 @@ static void process_random_ready_list(void) static void credit_entropy_bits(struct entropy_store *r, int nbits) { int entropy_count, orig; - const int pool_size = r->poolinfo->poolfracbits; int nfrac = nbits << ENTROPY_SHIFT; if (!nbits) @@ -706,25 +702,25 @@ retry: * turns no matter how large nbits is. */ int pnfrac = nfrac; - const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2; + const int s = POOL_BITSHIFT + ENTROPY_SHIFT + 2; /* The +2 corresponds to the /4 in the denominator */ do { - unsigned int anfrac = min(pnfrac, pool_size/2); + unsigned int anfrac = min(pnfrac, POOL_FRACBITS/2); unsigned int add = - ((pool_size - entropy_count)*anfrac*3) >> s; + ((POOL_FRACBITS - entropy_count)*anfrac*3) >> s; entropy_count += add; pnfrac -= anfrac; - } while (unlikely(entropy_count < pool_size-2 && pnfrac)); + } while (unlikely(entropy_count < POOL_FRACBITS-2 && pnfrac)); } if (WARN_ON(entropy_count < 0)) { pr_warn("negative entropy/overflow: pool %s count %d\n", r->name, entropy_count); entropy_count = 0; - } else if (entropy_count > pool_size) - entropy_count = pool_size; + } else if (entropy_count > POOL_FRACBITS) + entropy_count = POOL_FRACBITS; if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) goto retry; @@ -741,13 +737,11 @@ retry: static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) { - const int nbits_max = r->poolinfo->poolwords * 32; - if (nbits < 0) return -EINVAL; /* Cap the value to avoid overflows */ - nbits = min(nbits, nbits_max); + nbits = min(nbits, POOL_BITS); credit_entropy_bits(r, nbits); return 0; @@ -1343,7 +1337,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, int entropy_count, orig, have_bytes; size_t ibytes, nfrac; - BUG_ON(r->entropy_count > r->poolinfo->poolfracbits); + BUG_ON(r->entropy_count > POOL_FRACBITS); /* Can we pull enough? */ retry: @@ -1409,8 +1403,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out) /* Generate a hash across the pool */ spin_lock_irqsave(&r->lock, flags); - blake2s_update(&state, (const u8 *)r->pool, - r->poolinfo->poolwords * sizeof(*r->pool)); + blake2s_update(&state, (const u8 *)r->pool, POOL_BYTES); blake2s_final(&state, hash); /* final zeros out state */ /* @@ -1766,7 +1759,7 @@ static void __init init_std_data(struct entropy_store *r) unsigned long rv; mix_pool_bytes(r, &now, sizeof(now)); - for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) { + for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) { if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); From d38bb0853589c939573ea50e9cb64f733e0e273d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 9 Jan 2022 17:48:58 +0100 Subject: [PATCH 483/493] random: cleanup integer types Rather than using the userspace type, __uXX, switch to using uXX. And rather than using variously chosen `char *` or `unsigned char *`, use `u8 *` uniformly for things that aren't strings, in the case where we are doing byte-by-byte traversal. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 105 +++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 349beb318757..8b7567b5a6e6 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -456,7 +456,7 @@ static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); struct crng_state { - __u32 state[16]; + u32 state[16]; unsigned long init_time; spinlock_t lock; }; @@ -483,9 +483,9 @@ static bool crng_need_final_init = false; static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]); +static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]); static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA_BLOCK_SIZE], int used); + u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -509,16 +509,16 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); struct entropy_store; struct entropy_store { /* read-only data: */ - __u32 *pool; + u32 *pool; const char *name; /* read-write data: */ spinlock_t lock; - unsigned short add_ptr; - unsigned short input_rotate; + u16 add_ptr; + u16 input_rotate; int entropy_count; unsigned int last_data_init:1; - __u8 last_data[EXTRACT_SIZE]; + u8 last_data[EXTRACT_SIZE]; }; static ssize_t extract_entropy(struct entropy_store *r, void *buf, @@ -527,7 +527,7 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int fips); static void crng_reseed(struct crng_state *crng, struct entropy_store *r); -static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; +static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; static struct entropy_store input_pool = { .name = "input", @@ -535,7 +535,7 @@ static struct entropy_store input_pool = { .pool = input_pool_data }; -static __u32 const twist_table[8] = { +static u32 const twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; @@ -554,8 +554,8 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, { unsigned long i; int input_rotate; - const unsigned char *bytes = in; - __u32 w; + const u8 *bytes = in; + u32 w; input_rotate = r->input_rotate; i = r->add_ptr; @@ -608,10 +608,10 @@ static void mix_pool_bytes(struct entropy_store *r, const void *in, } struct fast_pool { - __u32 pool[4]; + u32 pool[4]; unsigned long last; - unsigned short reg_idx; - unsigned char count; + u16 reg_idx; + u8 count; }; /* @@ -621,8 +621,8 @@ struct fast_pool { */ static void fast_mix(struct fast_pool *f) { - __u32 a = f->pool[0], b = f->pool[1]; - __u32 c = f->pool[2], d = f->pool[3]; + u32 a = f->pool[0], b = f->pool[1]; + u32 c = f->pool[2], d = f->pool[3]; a += b; c += d; b = rol32(b, 6); d = rol32(d, 27); @@ -814,14 +814,14 @@ static bool __init crng_init_try_arch_early(struct crng_state *crng) static void crng_initialize_secondary(struct crng_state *crng) { chacha_init_consts(crng->state); - _get_random_bytes(&crng->state[4], sizeof(__u32) * 12); + _get_random_bytes(&crng->state[4], sizeof(u32) * 12); crng_init_try_arch(crng); crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } static void __init crng_initialize_primary(struct crng_state *crng) { - _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0); + _extract_entropy(&input_pool, &crng->state[4], sizeof(u32) * 12, 0); if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); @@ -911,10 +911,10 @@ static struct crng_state *select_crng(void) * path. So we can't afford to dilly-dally. Returns the number of * bytes processed from cp. */ -static size_t crng_fast_load(const char *cp, size_t len) +static size_t crng_fast_load(const u8 *cp, size_t len) { unsigned long flags; - char *p; + u8 *p; size_t ret = 0; if (!spin_trylock_irqsave(&primary_crng.lock, flags)) @@ -923,7 +923,7 @@ static size_t crng_fast_load(const char *cp, size_t len) spin_unlock_irqrestore(&primary_crng.lock, flags); return 0; } - p = (unsigned char *) &primary_crng.state[4]; + p = (u8 *) &primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; ret++; @@ -951,14 +951,14 @@ static size_t crng_fast_load(const char *cp, size_t len) * like a fixed DMI table (for example), which might very well be * unique to the machine, but is otherwise unvarying. */ -static int crng_slow_load(const char *cp, size_t len) +static int crng_slow_load(const u8 *cp, size_t len) { unsigned long flags; - static unsigned char lfsr = 1; - unsigned char tmp; - unsigned i, max = CHACHA_KEY_SIZE; - const char * src_buf = cp; - char * dest_buf = (char *) &primary_crng.state[4]; + static u8 lfsr = 1; + u8 tmp; + unsigned int i, max = CHACHA_KEY_SIZE; + const u8 * src_buf = cp; + u8 * dest_buf = (u8 *) &primary_crng.state[4]; if (!spin_trylock_irqsave(&primary_crng.lock, flags)) return 0; @@ -987,8 +987,8 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) unsigned long flags; int i, num; union { - __u8 block[CHACHA_BLOCK_SIZE]; - __u32 key[8]; + u8 block[CHACHA_BLOCK_SIZE]; + u32 key[8]; } buf; if (r) { @@ -1015,7 +1015,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA_BLOCK_SIZE]) + u8 out[CHACHA_BLOCK_SIZE]) { unsigned long flags, init_time; @@ -1033,7 +1033,7 @@ static void _extract_crng(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) +static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) { _extract_crng(select_crng(), out); } @@ -1043,26 +1043,26 @@ static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) * enough) to mutate the CRNG key to provide backtracking protection. */ static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA_BLOCK_SIZE], int used) + u8 tmp[CHACHA_BLOCK_SIZE], int used) { unsigned long flags; - __u32 *s, *d; + u32 *s, *d; int i; - used = round_up(used, sizeof(__u32)); + used = round_up(used, sizeof(u32)); if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { extract_crng(tmp); used = 0; } spin_lock_irqsave(&crng->lock, flags); - s = (__u32 *) &tmp[used]; + s = (u32 *) &tmp[used]; d = &crng->state[4]; for (i=0; i < 8; i++) *d++ ^= *s++; spin_unlock_irqrestore(&crng->lock, flags); } -static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) +static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) { _crng_backtrack_protect(select_crng(), tmp, used); } @@ -1070,7 +1070,7 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; - __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); + u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); int large_request = (nbytes > 256); while (nbytes) { @@ -1158,8 +1158,8 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) struct entropy_store *r; struct { long jiffies; - unsigned cycles; - unsigned num; + unsigned int cycles; + unsigned int num; } sample; long delta, delta2, delta3; @@ -1241,15 +1241,15 @@ static void add_interrupt_bench(cycles_t start) #define add_interrupt_bench(x) #endif -static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) +static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { - __u32 *ptr = (__u32 *) regs; + u32 *ptr = (u32 *) regs; unsigned int idx; if (regs == NULL) return 0; idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(__u32)) + if (idx >= sizeof(struct pt_regs) / sizeof(u32)) idx = 0; ptr += idx++; WRITE_ONCE(f->reg_idx, idx); @@ -1263,8 +1263,8 @@ void add_interrupt_randomness(int irq) struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); - __u32 c_high, j_high; - __u64 ip; + u32 c_high, j_high; + u64 ip; if (cycles == 0) cycles = get_reg(fast_pool, regs); @@ -1282,8 +1282,7 @@ void add_interrupt_randomness(int irq) if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && - crng_fast_load((char *) fast_pool->pool, - sizeof(fast_pool->pool)) > 0) { + crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) { fast_pool->count = 0; fast_pool->last = now; } @@ -1380,7 +1379,7 @@ retry: * * Note: we assume that .poolwords is a multiple of 16 words. */ -static void extract_buf(struct entropy_store *r, __u8 *out) +static void extract_buf(struct entropy_store *r, u8 *out) { struct blake2s_state state __aligned(__alignof__(unsigned long)); u8 hash[BLAKE2S_HASH_SIZE]; @@ -1430,7 +1429,7 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int fips) { ssize_t ret = 0, i; - __u8 tmp[EXTRACT_SIZE]; + u8 tmp[EXTRACT_SIZE]; unsigned long flags; while (nbytes) { @@ -1468,7 +1467,7 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int reserved) { - __u8 tmp[EXTRACT_SIZE]; + u8 tmp[EXTRACT_SIZE]; unsigned long flags; /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ @@ -1530,7 +1529,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, */ static void _get_random_bytes(void *buf, int nbytes) { - __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); + u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); trace_get_random_bytes(nbytes, _RET_IP_); @@ -1724,7 +1723,7 @@ EXPORT_SYMBOL(del_random_ready_callback); int __must_check get_random_bytes_arch(void *buf, int nbytes) { int left = nbytes; - char *p = buf; + u8 *p = buf; trace_get_random_bytes_arch(left, _RET_IP_); while (left) { @@ -1866,7 +1865,7 @@ static int write_pool(struct entropy_store *r, const char __user *buffer, size_t count) { size_t bytes; - __u32 t, buf[16]; + u32 t, buf[16]; const char __user *p = buffer; while (count > 0) { @@ -1876,7 +1875,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count) if (copy_from_user(&buf, p, bytes)) return -EFAULT; - for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) { + for (b = bytes; b > 0; b -= sizeof(u32), i++) { if (!arch_get_random_int(&t)) break; buf[i] ^= t; From a4bfa9b31802c14ff5847123c12b98d5e36b3985 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 12 Jan 2022 15:22:30 +0100 Subject: [PATCH 484/493] random: remove incomplete last_data logic There were a few things added under the "if (fips_enabled)" banner, which never really got completed, and the FIPS people anyway are choosing a different direction. Rather than keep around this halfbaked code, get rid of it so that we can focus on a single design of the RNG rather than two designs. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 39 ++++----------------------------------- 1 file changed, 4 insertions(+), 35 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8b7567b5a6e6..07354e228341 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -337,7 +337,6 @@ #include #include #include -#include #include #include #include @@ -517,14 +516,12 @@ struct entropy_store { u16 add_ptr; u16 input_rotate; int entropy_count; - unsigned int last_data_init:1; - u8 last_data[EXTRACT_SIZE]; }; static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int rsvd); static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int fips); + size_t nbytes); static void crng_reseed(struct crng_state *crng, struct entropy_store *r); static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; @@ -821,7 +818,7 @@ static void crng_initialize_secondary(struct crng_state *crng) static void __init crng_initialize_primary(struct crng_state *crng) { - _extract_entropy(&input_pool, &crng->state[4], sizeof(u32) * 12, 0); + _extract_entropy(&input_pool, &crng->state[4], sizeof(u32) * 12); if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); @@ -1426,22 +1423,13 @@ static void extract_buf(struct entropy_store *r, u8 *out) } static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int fips) + size_t nbytes) { ssize_t ret = 0, i; u8 tmp[EXTRACT_SIZE]; - unsigned long flags; while (nbytes) { extract_buf(r, tmp); - - if (fips) { - spin_lock_irqsave(&r->lock, flags); - if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) - panic("Hardware RNG duplicated output!\n"); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - spin_unlock_irqrestore(&r->lock, flags); - } i = min_t(int, nbytes, EXTRACT_SIZE); memcpy(buf, tmp, i); nbytes -= i; @@ -1467,28 +1455,9 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int reserved) { - u8 tmp[EXTRACT_SIZE]; - unsigned long flags; - - /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ - if (fips_enabled) { - spin_lock_irqsave(&r->lock, flags); - if (!r->last_data_init) { - r->last_data_init = 1; - spin_unlock_irqrestore(&r->lock, flags); - trace_extract_entropy(r->name, EXTRACT_SIZE, - ENTROPY_BITS(r), _RET_IP_); - extract_buf(r, tmp); - spin_lock_irqsave(&r->lock, flags); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - } - spin_unlock_irqrestore(&r->lock, flags); - } - trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); nbytes = account(r, nbytes, min, reserved); - - return _extract_entropy(r, buf, nbytes, fips_enabled); + return _extract_entropy(r, buf, nbytes); } #define warn_unseeded_randomness(previous) \ From 8b2d953b91e7f60200c24067ab17b77cc7bfd0d4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 12 Jan 2022 15:28:21 +0100 Subject: [PATCH 485/493] random: remove unused extract_entropy() reserved argument This argument is always set to zero, as a result of us not caring about keeping a certain amount reserved in the pool these days. So just remove it and cleanup the function signatures. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 07354e228341..80734436ed98 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -519,7 +519,7 @@ struct entropy_store { }; static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int rsvd); + size_t nbytes, int min); static ssize_t _extract_entropy(struct entropy_store *r, void *buf, size_t nbytes); @@ -989,7 +989,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } buf; if (r) { - num = extract_entropy(r, &buf, 32, 16, 0); + num = extract_entropy(r, &buf, 32, 16); if (num == 0) return; } else { @@ -1327,8 +1327,7 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); * This function decides how many bytes to actually take from the * given pool, and also debits the entropy count accordingly. */ -static size_t account(struct entropy_store *r, size_t nbytes, int min, - int reserved) +static size_t account(struct entropy_store *r, size_t nbytes, int min) { int entropy_count, orig, have_bytes; size_t ibytes, nfrac; @@ -1342,7 +1341,7 @@ retry: /* never pull more than available */ have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); - if ((have_bytes -= reserved) < 0) + if (have_bytes < 0) have_bytes = 0; ibytes = min_t(size_t, ibytes, have_bytes); if (ibytes < min) @@ -1448,15 +1447,13 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, * returns it in a buffer. * * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding while the - * reserved parameter indicates how much entropy we must leave in the - * pool after each pull to avoid starving other readers. + * failing to avoid races that defeat catastrophic reseeding. */ static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int reserved) + size_t nbytes, int min) { trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); - nbytes = account(r, nbytes, min, reserved); + nbytes = account(r, nbytes, min); return _extract_entropy(r, buf, nbytes); } From 90ed1e67e896cc8040a523f8428fc02f9b164394 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 12 Jan 2022 17:18:08 +0100 Subject: [PATCH 486/493] random: rather than entropy_store abstraction, use global Originally, the RNG used several pools, so having things abstracted out over a generic entropy_store object made sense. These days, there's only one input pool, and then an uneven mix of usage via the abstraction and usage via &input_pool. Rather than this uneasy mixture, just get rid of the abstraction entirely and have things always use the global. This simplifies the code and makes reading it a bit easier. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 219 +++++++++++++++------------------- include/trace/events/random.h | 56 ++++----- 2 files changed, 117 insertions(+), 158 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 80734436ed98..40de9f41d4db 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -375,7 +375,7 @@ * credit_entropy_bits() needs to be 64 bits wide. */ #define ENTROPY_SHIFT 3 -#define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT) +#define ENTROPY_BITS() (input_pool.entropy_count >> ENTROPY_SHIFT) /* * If the entropy count falls under this number of bits, then we @@ -505,33 +505,27 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); * **********************************************************************/ -struct entropy_store; -struct entropy_store { +static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; + +static struct { /* read-only data: */ u32 *pool; - const char *name; /* read-write data: */ spinlock_t lock; u16 add_ptr; u16 input_rotate; int entropy_count; -}; - -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min); -static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes); - -static void crng_reseed(struct crng_state *crng, struct entropy_store *r); -static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; - -static struct entropy_store input_pool = { - .name = "input", +} input_pool = { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; +static ssize_t extract_entropy(void *buf, size_t nbytes, int min); +static ssize_t _extract_entropy(void *buf, size_t nbytes); + +static void crng_reseed(struct crng_state *crng, bool use_input_pool); + static u32 const twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; @@ -546,16 +540,15 @@ static u32 const twist_table[8] = { * it's cheap to do so and helps slightly in the expected case where * the entropy is concentrated in the low-order bits. */ -static void _mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) +static void _mix_pool_bytes(const void *in, int nbytes) { unsigned long i; int input_rotate; const u8 *bytes = in; u32 w; - input_rotate = r->input_rotate; - i = r->add_ptr; + input_rotate = input_pool.input_rotate; + i = input_pool.add_ptr; /* mix one byte at a time to simplify size handling and churn faster */ while (nbytes--) { @@ -563,15 +556,15 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, i = (i - 1) & POOL_WORDMASK; /* XOR in the various taps */ - w ^= r->pool[i]; - w ^= r->pool[(i + POOL_TAP1) & POOL_WORDMASK]; - w ^= r->pool[(i + POOL_TAP2) & POOL_WORDMASK]; - w ^= r->pool[(i + POOL_TAP3) & POOL_WORDMASK]; - w ^= r->pool[(i + POOL_TAP4) & POOL_WORDMASK]; - w ^= r->pool[(i + POOL_TAP5) & POOL_WORDMASK]; + w ^= input_pool.pool[i]; + w ^= input_pool.pool[(i + POOL_TAP1) & POOL_WORDMASK]; + w ^= input_pool.pool[(i + POOL_TAP2) & POOL_WORDMASK]; + w ^= input_pool.pool[(i + POOL_TAP3) & POOL_WORDMASK]; + w ^= input_pool.pool[(i + POOL_TAP4) & POOL_WORDMASK]; + w ^= input_pool.pool[(i + POOL_TAP5) & POOL_WORDMASK]; /* Mix the result back in with a twist */ - r->pool[i] = (w >> 3) ^ twist_table[w & 7]; + input_pool.pool[i] = (w >> 3) ^ twist_table[w & 7]; /* * Normally, we add 7 bits of rotation to the pool. @@ -582,26 +575,24 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, input_rotate = (input_rotate + (i ? 7 : 14)) & 31; } - r->input_rotate = input_rotate; - r->add_ptr = i; + input_pool.input_rotate = input_rotate; + input_pool.add_ptr = i; } -static void __mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) +static void __mix_pool_bytes(const void *in, int nbytes) { - trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_); - _mix_pool_bytes(r, in, nbytes); + trace_mix_pool_bytes_nolock(nbytes, _RET_IP_); + _mix_pool_bytes(in, nbytes); } -static void mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) +static void mix_pool_bytes(const void *in, int nbytes) { unsigned long flags; - trace_mix_pool_bytes(r->name, nbytes, _RET_IP_); - spin_lock_irqsave(&r->lock, flags); - _mix_pool_bytes(r, in, nbytes); - spin_unlock_irqrestore(&r->lock, flags); + trace_mix_pool_bytes(nbytes, _RET_IP_); + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(in, nbytes); + spin_unlock_irqrestore(&input_pool.lock, flags); } struct fast_pool { @@ -663,16 +654,16 @@ static void process_random_ready_list(void) * Use credit_entropy_bits_safe() if the value comes from userspace * or otherwise should be checked for extreme values. */ -static void credit_entropy_bits(struct entropy_store *r, int nbits) +static void credit_entropy_bits(int nbits) { - int entropy_count, orig; + int entropy_count, entropy_bits, orig; int nfrac = nbits << ENTROPY_SHIFT; if (!nbits) return; retry: - entropy_count = orig = READ_ONCE(r->entropy_count); + entropy_count = orig = READ_ONCE(input_pool.entropy_count); if (nfrac < 0) { /* Debit */ entropy_count += nfrac; @@ -713,26 +704,21 @@ retry: } if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy/overflow: pool %s count %d\n", - r->name, entropy_count); + pr_warn("negative entropy/overflow: count %d\n", entropy_count); entropy_count = 0; } else if (entropy_count > POOL_FRACBITS) entropy_count = POOL_FRACBITS; - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) + if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) goto retry; - trace_credit_entropy_bits(r->name, nbits, - entropy_count >> ENTROPY_SHIFT, _RET_IP_); + trace_credit_entropy_bits(nbits, entropy_count >> ENTROPY_SHIFT, _RET_IP_); - if (r == &input_pool) { - int entropy_bits = entropy_count >> ENTROPY_SHIFT; - - if (crng_init < 2 && entropy_bits >= 128) - crng_reseed(&primary_crng, r); - } + entropy_bits = entropy_count >> ENTROPY_SHIFT; + if (crng_init < 2 && entropy_bits >= 128) + crng_reseed(&primary_crng, true); } -static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) +static int credit_entropy_bits_safe(int nbits) { if (nbits < 0) return -EINVAL; @@ -740,7 +726,7 @@ static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) /* Cap the value to avoid overflows */ nbits = min(nbits, POOL_BITS); - credit_entropy_bits(r, nbits); + credit_entropy_bits(nbits); return 0; } @@ -818,7 +804,7 @@ static void crng_initialize_secondary(struct crng_state *crng) static void __init crng_initialize_primary(struct crng_state *crng) { - _extract_entropy(&input_pool, &crng->state[4], sizeof(u32) * 12); + _extract_entropy(&crng->state[4], sizeof(u32) * 12); if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); @@ -979,7 +965,7 @@ static int crng_slow_load(const u8 *cp, size_t len) return 1; } -static void crng_reseed(struct crng_state *crng, struct entropy_store *r) +static void crng_reseed(struct crng_state *crng, bool use_input_pool) { unsigned long flags; int i, num; @@ -988,8 +974,8 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) u32 key[8]; } buf; - if (r) { - num = extract_entropy(r, &buf, 32, 16); + if (use_input_pool) { + num = extract_entropy(&buf, 32, 16); if (num == 0) return; } else { @@ -1020,8 +1006,7 @@ static void _extract_crng(struct crng_state *crng, init_time = READ_ONCE(crng->init_time); if (time_after(READ_ONCE(crng_global_init_time), init_time) || time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng, crng == &primary_crng ? - &input_pool : NULL); + crng_reseed(crng, crng == &primary_crng); } spin_lock_irqsave(&crng->lock, flags); chacha20_block(&crng->state[0], out); @@ -1132,8 +1117,8 @@ void add_device_randomness(const void *buf, unsigned int size) trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&input_pool, buf, size); - _mix_pool_bytes(&input_pool, &time, sizeof(time)); + _mix_pool_bytes(buf, size); + _mix_pool_bytes(&time, sizeof(time)); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); @@ -1152,7 +1137,6 @@ static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; */ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) { - struct entropy_store *r; struct { long jiffies; unsigned int cycles; @@ -1163,8 +1147,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) sample.jiffies = jiffies; sample.cycles = random_get_entropy(); sample.num = num; - r = &input_pool; - mix_pool_bytes(r, &sample, sizeof(sample)); + mix_pool_bytes(&sample, sizeof(sample)); /* * Calculate number of bits of randomness we probably added. @@ -1196,7 +1179,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) * Round down by 1 bit on general principles, * and limit entropy estimate to 12 bits. */ - credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); + credit_entropy_bits(min_t(int, fls(delta>>1), 11)); } void add_input_randomness(unsigned int type, unsigned int code, @@ -1211,7 +1194,7 @@ void add_input_randomness(unsigned int type, unsigned int code, last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(ENTROPY_BITS(&input_pool)); + trace_add_input_randomness(ENTROPY_BITS()); } EXPORT_SYMBOL_GPL(add_input_randomness); @@ -1255,7 +1238,6 @@ static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) void add_interrupt_randomness(int irq) { - struct entropy_store *r; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; @@ -1290,18 +1272,17 @@ void add_interrupt_randomness(int irq) !time_after(now, fast_pool->last + HZ)) return; - r = &input_pool; - if (!spin_trylock(&r->lock)) + if (!spin_trylock(&input_pool.lock)) return; fast_pool->last = now; - __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool)); - spin_unlock(&r->lock); + __mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + spin_unlock(&input_pool.lock); fast_pool->count = 0; /* award one bit for the contents of the fast pool */ - credit_entropy_bits(r, 1); + credit_entropy_bits(1); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); @@ -1312,7 +1293,7 @@ void add_disk_randomness(struct gendisk *disk) return; /* first major is 1, so we get >= 0x200 here */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool)); + trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS()); } EXPORT_SYMBOL_GPL(add_disk_randomness); #endif @@ -1327,16 +1308,16 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); * This function decides how many bytes to actually take from the * given pool, and also debits the entropy count accordingly. */ -static size_t account(struct entropy_store *r, size_t nbytes, int min) +static size_t account(size_t nbytes, int min) { int entropy_count, orig, have_bytes; size_t ibytes, nfrac; - BUG_ON(r->entropy_count > POOL_FRACBITS); + BUG_ON(input_pool.entropy_count > POOL_FRACBITS); /* Can we pull enough? */ retry: - entropy_count = orig = READ_ONCE(r->entropy_count); + entropy_count = orig = READ_ONCE(input_pool.entropy_count); ibytes = nbytes; /* never pull more than available */ have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); @@ -1348,8 +1329,7 @@ retry: ibytes = 0; if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy count: pool %s count %d\n", - r->name, entropy_count); + pr_warn("negative entropy count: count %d\n", entropy_count); entropy_count = 0; } nfrac = ibytes << (ENTROPY_SHIFT + 3); @@ -1358,11 +1338,11 @@ retry: else entropy_count = 0; - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) + if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) goto retry; - trace_debit_entropy(r->name, 8 * ibytes); - if (ibytes && ENTROPY_BITS(r) < random_write_wakeup_bits) { + trace_debit_entropy(8 * ibytes); + if (ibytes && ENTROPY_BITS() < random_write_wakeup_bits) { wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1375,7 +1355,7 @@ retry: * * Note: we assume that .poolwords is a multiple of 16 words. */ -static void extract_buf(struct entropy_store *r, u8 *out) +static void extract_buf(u8 *out) { struct blake2s_state state __aligned(__alignof__(unsigned long)); u8 hash[BLAKE2S_HASH_SIZE]; @@ -1397,8 +1377,8 @@ static void extract_buf(struct entropy_store *r, u8 *out) } /* Generate a hash across the pool */ - spin_lock_irqsave(&r->lock, flags); - blake2s_update(&state, (const u8 *)r->pool, POOL_BYTES); + spin_lock_irqsave(&input_pool.lock, flags); + blake2s_update(&state, (const u8 *)input_pool.pool, POOL_BYTES); blake2s_final(&state, hash); /* final zeros out state */ /* @@ -1410,8 +1390,8 @@ static void extract_buf(struct entropy_store *r, u8 *out) * brute-forcing the feedback as hard as brute-forcing the * hash. */ - __mix_pool_bytes(r, hash, sizeof(hash)); - spin_unlock_irqrestore(&r->lock, flags); + __mix_pool_bytes(hash, sizeof(hash)); + spin_unlock_irqrestore(&input_pool.lock, flags); /* Note that EXTRACT_SIZE is half of hash size here, because above * we've dumped the full length back into mixer. By reducing the @@ -1421,14 +1401,13 @@ static void extract_buf(struct entropy_store *r, u8 *out) memzero_explicit(hash, sizeof(hash)); } -static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes) +static ssize_t _extract_entropy(void *buf, size_t nbytes) { ssize_t ret = 0, i; u8 tmp[EXTRACT_SIZE]; while (nbytes) { - extract_buf(r, tmp); + extract_buf(tmp); i = min_t(int, nbytes, EXTRACT_SIZE); memcpy(buf, tmp, i); nbytes -= i; @@ -1449,12 +1428,11 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, * The min parameter specifies the minimum amount we can pull before * failing to avoid races that defeat catastrophic reseeding. */ -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min) +static ssize_t extract_entropy(void *buf, size_t nbytes, int min) { - trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); - nbytes = account(r, nbytes, min); - return _extract_entropy(r, buf, nbytes); + trace_extract_entropy(nbytes, ENTROPY_BITS(), _RET_IP_); + nbytes = account(nbytes, min); + return _extract_entropy(buf, nbytes); } #define warn_unseeded_randomness(previous) \ @@ -1539,7 +1517,7 @@ EXPORT_SYMBOL(get_random_bytes); */ static void entropy_timer(struct timer_list *t) { - credit_entropy_bits(&input_pool, 1); + credit_entropy_bits(1); } /* @@ -1563,14 +1541,14 @@ static void try_to_generate_entropy(void) while (!crng_ready()) { if (!timer_pending(&stack.timer)) mod_timer(&stack.timer, jiffies+1); - mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.now, sizeof(stack.now)); schedule(); stack.now = random_get_entropy(); } del_timer_sync(&stack.timer); destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.now, sizeof(stack.now)); } /* @@ -1711,26 +1689,24 @@ EXPORT_SYMBOL(get_random_bytes_arch); /* * init_std_data - initialize pool with system data * - * @r: pool to initialize - * * This function clears the pool's entropy count and mixes some system * data into the pool to prepare it for use. The pool is not cleared * as that can only decrease the entropy in the pool. */ -static void __init init_std_data(struct entropy_store *r) +static void __init init_std_data(void) { int i; ktime_t now = ktime_get_real(); unsigned long rv; - mix_pool_bytes(r, &now, sizeof(now)); + mix_pool_bytes(&now, sizeof(now)); for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) { if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); - mix_pool_bytes(r, &rv, sizeof(rv)); + mix_pool_bytes(&rv, sizeof(rv)); } - mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); + mix_pool_bytes(utsname(), sizeof(*(utsname()))); } /* @@ -1745,7 +1721,7 @@ static void __init init_std_data(struct entropy_store *r) */ int __init rand_initialize(void) { - init_std_data(&input_pool); + init_std_data(); if (crng_need_final_init) crng_finalize_init(&primary_crng); crng_initialize_primary(&primary_crng); @@ -1782,7 +1758,7 @@ urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool)); + trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS()); return ret; } @@ -1822,13 +1798,13 @@ random_poll(struct file *file, poll_table * wait) mask = 0; if (crng_ready()) mask |= EPOLLIN | EPOLLRDNORM; - if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits) + if (ENTROPY_BITS() < random_write_wakeup_bits) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } static int -write_pool(struct entropy_store *r, const char __user *buffer, size_t count) +write_pool(const char __user *buffer, size_t count) { size_t bytes; u32 t, buf[16]; @@ -1850,7 +1826,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count) count -= bytes; p += bytes; - mix_pool_bytes(r, buf, bytes); + mix_pool_bytes(buf, bytes); cond_resched(); } @@ -1862,7 +1838,7 @@ static ssize_t random_write(struct file *file, const char __user *buffer, { size_t ret; - ret = write_pool(&input_pool, buffer, count); + ret = write_pool(buffer, count); if (ret) return ret; @@ -1878,7 +1854,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: /* inherently racy, no point locking */ - ent_count = ENTROPY_BITS(&input_pool); + ent_count = ENTROPY_BITS(); if (put_user(ent_count, p)) return -EFAULT; return 0; @@ -1887,7 +1863,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; - return credit_entropy_bits_safe(&input_pool, ent_count); + return credit_entropy_bits_safe(ent_count); case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1897,11 +1873,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EINVAL; if (get_user(size, p++)) return -EFAULT; - retval = write_pool(&input_pool, (const char __user *)p, - size); + retval = write_pool((const char __user *)p, size); if (retval < 0) return retval; - return credit_entropy_bits_safe(&input_pool, ent_count); + return credit_entropy_bits_safe(ent_count); case RNDZAPENTCNT: case RNDCLEARPOOL: /* @@ -1917,7 +1892,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (crng_init < 2) return -ENODATA; - crng_reseed(&primary_crng, &input_pool); + crng_reseed(&primary_crng, true); WRITE_ONCE(crng_global_init_time, jiffies - 1); return 0; default: @@ -2241,11 +2216,9 @@ randomize_page(unsigned long start, unsigned long range) void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy) { - struct entropy_store *poolp = &input_pool; - if (unlikely(crng_init == 0)) { size_t ret = crng_fast_load(buffer, count); - mix_pool_bytes(poolp, buffer, ret); + mix_pool_bytes(buffer, ret); count -= ret; buffer += ret; if (!count || crng_init == 0) @@ -2258,9 +2231,9 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, */ wait_event_interruptible(random_write_wait, !system_wq || kthread_should_stop() || - ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); - mix_pool_bytes(poolp, buffer, count); - credit_entropy_bits(poolp, entropy); + ENTROPY_BITS() <= random_write_wakeup_bits); + mix_pool_bytes(buffer, count); + credit_entropy_bits(entropy); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 3d7b432ca5f3..a2d9aa16a5d7 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -28,80 +28,71 @@ TRACE_EVENT(add_device_randomness, ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP), + TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, bytes ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->bytes = bytes; __entry->IP = IP; ), - TP_printk("%s pool: bytes %d caller %pS", - __entry->pool_name, __entry->bytes, (void *)__entry->IP) + TP_printk("input pool: bytes %d caller %pS", + __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP) + TP_ARGS(bytes, IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP) + TP_ARGS(bytes, IP) ); TRACE_EVENT(credit_entropy_bits, - TP_PROTO(const char *pool_name, int bits, int entropy_count, - unsigned long IP), + TP_PROTO(int bits, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, bits, entropy_count, IP), + TP_ARGS(bits, entropy_count, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, bits ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->bits = bits; __entry->entropy_count = entropy_count; __entry->IP = IP; ), - TP_printk("%s pool: bits %d entropy_count %d caller %pS", - __entry->pool_name, __entry->bits, - __entry->entropy_count, (void *)__entry->IP) + TP_printk("input pool: bits %d entropy_count %d caller %pS", + __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(debit_entropy, - TP_PROTO(const char *pool_name, int debit_bits), + TP_PROTO(int debit_bits), - TP_ARGS(pool_name, debit_bits), + TP_ARGS( debit_bits), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, debit_bits ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->debit_bits = debit_bits; ), - TP_printk("%s: debit_bits %d", __entry->pool_name, - __entry->debit_bits) + TP_printk("input pool: debit_bits %d", __entry->debit_bits) ); TRACE_EVENT(add_input_randomness, @@ -170,36 +161,31 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), + TP_PROTO(int nbytes, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, nbytes, entropy_count, IP), + TP_ARGS(nbytes, entropy_count, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, nbytes ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; __entry->IP = IP; ), - TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", - __entry->pool_name, __entry->nbytes, __entry->entropy_count, - (void *)__entry->IP) + TP_printk("input pool: nbytes %d entropy_count %d caller %pS", + __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), + TP_PROTO(int nbytes, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, nbytes, entropy_count, IP) + TP_ARGS(nbytes, entropy_count, IP) ); TRACE_EVENT(urandom_read, From 0f63702718c91d89c922081ac1e6baeddc2d8b1a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 13 Jan 2022 15:51:06 +0100 Subject: [PATCH 487/493] random: remove unused OUTPUT_POOL constants We no longer have an output pool. Rather, we have just a wakeup bits threshold for /dev/random reads, presumably so that processes don't hang. This value, random_write_wakeup_bits, is configurable anyway. So all the no longer usefully named OUTPUT_POOL constants were doing was setting a reasonable default for random_write_wakeup_bits. This commit gets rid of the constants and just puts it all in the default value of random_write_wakeup_bits. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 40de9f41d4db..74d43bc8fa48 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -363,8 +363,6 @@ */ #define INPUT_POOL_SHIFT 12 #define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) -#define OUTPUT_POOL_SHIFT 10 -#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) #define EXTRACT_SIZE (BLAKE2S_HASH_SIZE / 2) /* @@ -382,7 +380,7 @@ * should wake up processes which are selecting or polling on write * access to /dev/random. */ -static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; +static int random_write_wakeup_bits = 28 * (1 << 5); /* * Originally, we used a primitive polynomial of degree .poolwords From 5b87adf30f1464477169a1d653e9baf8c012bbfe Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 13 Jan 2022 16:11:21 +0100 Subject: [PATCH 488/493] random: de-duplicate INPUT_POOL constants We already had the POOL_* constants, so deduplicate the older INPUT_POOL ones. As well, fold EXTRACT_SIZE into the poolinfo enum, since it's related. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 74d43bc8fa48..b494080b99d0 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -358,13 +358,6 @@ /* #define ADD_INTERRUPT_BENCH */ -/* - * Configuration information - */ -#define INPUT_POOL_SHIFT 12 -#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) -#define EXTRACT_SIZE (BLAKE2S_HASH_SIZE / 2) - /* * To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. @@ -440,7 +433,9 @@ enum poolinfo { POOL_TAP2 = 76, POOL_TAP3 = 51, POOL_TAP4 = 25, - POOL_TAP5 = 1 + POOL_TAP5 = 1, + + EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2 }; /* @@ -503,7 +498,7 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); * **********************************************************************/ -static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; +static u32 input_pool_data[POOL_WORDS] __latent_entropy; static struct { /* read-only data: */ @@ -1961,7 +1956,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, #include static int min_write_thresh; -static int max_write_thresh = INPUT_POOL_WORDS * 32; +static int max_write_thresh = POOL_BITS; static int random_min_urandom_seed = 60; static char sysctl_bootid[16]; @@ -2018,7 +2013,7 @@ static int proc_do_entropy(struct ctl_table *table, int write, return proc_dointvec(&fake_table, write, buffer, lenp, ppos); } -static int sysctl_poolsize = INPUT_POOL_WORDS * 32; +static int sysctl_poolsize = POOL_BITS; extern struct ctl_table random_table[]; struct ctl_table random_table[] = { { From b3d51c1f542113342ddfbf6007e38a684b9dbec9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Jan 2022 16:48:35 +0100 Subject: [PATCH 489/493] random: prepend remaining pool constants with POOL_ The other pool constants are prepended with POOL_, but not these last ones. Rename them. This will then let us move them into the enum in the following commit. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index b494080b99d0..3b7e1ebfd30d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -362,11 +362,11 @@ * To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. * - * 2*(ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in + * 2*(POOL_ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in * credit_entropy_bits() needs to be 64 bits wide. */ -#define ENTROPY_SHIFT 3 -#define ENTROPY_BITS() (input_pool.entropy_count >> ENTROPY_SHIFT) +#define POOL_ENTROPY_SHIFT 3 +#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) /* * If the entropy count falls under this number of bits, then we @@ -426,7 +426,7 @@ enum poolinfo { POOL_BYTES = POOL_WORDS * sizeof(u32), POOL_BITS = POOL_BYTES * 8, POOL_BITSHIFT = ilog2(POOL_WORDS) + 5, - POOL_FRACBITS = POOL_WORDS << (ENTROPY_SHIFT + 5), + POOL_FRACBITS = POOL_WORDS << (POOL_ENTROPY_SHIFT + 5), /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ POOL_TAP1 = 104, @@ -650,7 +650,7 @@ static void process_random_ready_list(void) static void credit_entropy_bits(int nbits) { int entropy_count, entropy_bits, orig; - int nfrac = nbits << ENTROPY_SHIFT; + int nfrac = nbits << POOL_ENTROPY_SHIFT; if (!nbits) return; @@ -683,7 +683,7 @@ retry: * turns no matter how large nbits is. */ int pnfrac = nfrac; - const int s = POOL_BITSHIFT + ENTROPY_SHIFT + 2; + const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2; /* The +2 corresponds to the /4 in the denominator */ do { @@ -704,9 +704,9 @@ retry: if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) goto retry; - trace_credit_entropy_bits(nbits, entropy_count >> ENTROPY_SHIFT, _RET_IP_); + trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_); - entropy_bits = entropy_count >> ENTROPY_SHIFT; + entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT; if (crng_init < 2 && entropy_bits >= 128) crng_reseed(&primary_crng, true); } @@ -1187,7 +1187,7 @@ void add_input_randomness(unsigned int type, unsigned int code, last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(ENTROPY_BITS()); + trace_add_input_randomness(POOL_ENTROPY_BITS()); } EXPORT_SYMBOL_GPL(add_input_randomness); @@ -1286,7 +1286,7 @@ void add_disk_randomness(struct gendisk *disk) return; /* first major is 1, so we get >= 0x200 here */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS()); + trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS()); } EXPORT_SYMBOL_GPL(add_disk_randomness); #endif @@ -1313,7 +1313,7 @@ retry: entropy_count = orig = READ_ONCE(input_pool.entropy_count); ibytes = nbytes; /* never pull more than available */ - have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); + have_bytes = entropy_count >> (POOL_ENTROPY_SHIFT + 3); if (have_bytes < 0) have_bytes = 0; @@ -1325,7 +1325,7 @@ retry: pr_warn("negative entropy count: count %d\n", entropy_count); entropy_count = 0; } - nfrac = ibytes << (ENTROPY_SHIFT + 3); + nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); if ((size_t) entropy_count > nfrac) entropy_count -= nfrac; else @@ -1335,7 +1335,7 @@ retry: goto retry; trace_debit_entropy(8 * ibytes); - if (ibytes && ENTROPY_BITS() < random_write_wakeup_bits) { + if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) { wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1423,7 +1423,7 @@ static ssize_t _extract_entropy(void *buf, size_t nbytes) */ static ssize_t extract_entropy(void *buf, size_t nbytes, int min) { - trace_extract_entropy(nbytes, ENTROPY_BITS(), _RET_IP_); + trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_); nbytes = account(nbytes, min); return _extract_entropy(buf, nbytes); } @@ -1749,9 +1749,9 @@ urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, { int ret; - nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); + nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3)); ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS()); + trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS()); return ret; } @@ -1791,7 +1791,7 @@ random_poll(struct file *file, poll_table * wait) mask = 0; if (crng_ready()) mask |= EPOLLIN | EPOLLRDNORM; - if (ENTROPY_BITS() < random_write_wakeup_bits) + if (POOL_ENTROPY_BITS() < random_write_wakeup_bits) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } @@ -1847,7 +1847,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: /* inherently racy, no point locking */ - ent_count = ENTROPY_BITS(); + ent_count = POOL_ENTROPY_BITS(); if (put_user(ent_count, p)) return -EFAULT; return 0; @@ -2005,7 +2005,7 @@ static int proc_do_entropy(struct ctl_table *table, int write, struct ctl_table fake_table; int entropy_count; - entropy_count = *(int *)table->data >> ENTROPY_SHIFT; + entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT; fake_table.data = &entropy_count; fake_table.maxlen = sizeof(entropy_count); @@ -2224,7 +2224,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, */ wait_event_interruptible(random_write_wait, !system_wq || kthread_should_stop() || - ENTROPY_BITS() <= random_write_wakeup_bits); + POOL_ENTROPY_BITS() <= random_write_wakeup_bits); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); } From 18263c4e8e62f7329f38f5eadc568751242ca89c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 13 Jan 2022 18:18:48 +0100 Subject: [PATCH 490/493] random: cleanup fractional entropy shift constants The entropy estimator is calculated in terms of 1/8 bits, which means there are various constants where things are shifted by 3. Move these into our pool info enum with the other relevant constants. While we're at it, move an English assertion about sizes into a proper BUILD_BUG_ON so that the compiler can ensure this invariant. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 3b7e1ebfd30d..ff672be101fc 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -358,16 +358,6 @@ /* #define ADD_INTERRUPT_BENCH */ -/* - * To allow fractional bits to be tracked, the entropy_count field is - * denominated in units of 1/8th bits. - * - * 2*(POOL_ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in - * credit_entropy_bits() needs to be 64 bits wide. - */ -#define POOL_ENTROPY_SHIFT 3 -#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - /* * If the entropy count falls under this number of bits, then we * should wake up processes which are selecting or polling on write @@ -425,8 +415,13 @@ enum poolinfo { POOL_WORDMASK = POOL_WORDS - 1, POOL_BYTES = POOL_WORDS * sizeof(u32), POOL_BITS = POOL_BYTES * 8, - POOL_BITSHIFT = ilog2(POOL_WORDS) + 5, - POOL_FRACBITS = POOL_WORDS << (POOL_ENTROPY_SHIFT + 5), + POOL_BITSHIFT = ilog2(POOL_BITS), + + /* To allow fractional bits to be tracked, the entropy_count field is + * denominated in units of 1/8th bits. */ + POOL_ENTROPY_SHIFT = 3, +#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) + POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ POOL_TAP1 = 104, @@ -652,6 +647,9 @@ static void credit_entropy_bits(int nbits) int entropy_count, entropy_bits, orig; int nfrac = nbits << POOL_ENTROPY_SHIFT; + /* Ensure that the multiplication can avoid being 64 bits wide. */ + BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31); + if (!nbits) return; @@ -687,13 +685,13 @@ retry: /* The +2 corresponds to the /4 in the denominator */ do { - unsigned int anfrac = min(pnfrac, POOL_FRACBITS/2); + unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2); unsigned int add = - ((POOL_FRACBITS - entropy_count)*anfrac*3) >> s; + ((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s; entropy_count += add; pnfrac -= anfrac; - } while (unlikely(entropy_count < POOL_FRACBITS-2 && pnfrac)); + } while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac)); } if (WARN_ON(entropy_count < 0)) { From 6c0eace6e1499712583b6ee62d95161e8b3449f5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 15 Jan 2022 14:40:04 +0100 Subject: [PATCH 491/493] random: access input_pool_data directly rather than through pointer This gets rid of another abstraction we no longer need. It would be nice if we could instead make pool an array rather than a pointer, but the latent entropy plugin won't be able to do its magic in that case. So instead we put all accesses to the input pool's actual data through the input_pool_data array directly. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ff672be101fc..6e4218f78260 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -496,17 +496,12 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); static u32 input_pool_data[POOL_WORDS] __latent_entropy; static struct { - /* read-only data: */ - u32 *pool; - - /* read-write data: */ spinlock_t lock; u16 add_ptr; u16 input_rotate; int entropy_count; } input_pool = { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), - .pool = input_pool_data }; static ssize_t extract_entropy(void *buf, size_t nbytes, int min); @@ -544,15 +539,15 @@ static void _mix_pool_bytes(const void *in, int nbytes) i = (i - 1) & POOL_WORDMASK; /* XOR in the various taps */ - w ^= input_pool.pool[i]; - w ^= input_pool.pool[(i + POOL_TAP1) & POOL_WORDMASK]; - w ^= input_pool.pool[(i + POOL_TAP2) & POOL_WORDMASK]; - w ^= input_pool.pool[(i + POOL_TAP3) & POOL_WORDMASK]; - w ^= input_pool.pool[(i + POOL_TAP4) & POOL_WORDMASK]; - w ^= input_pool.pool[(i + POOL_TAP5) & POOL_WORDMASK]; + w ^= input_pool_data[i]; + w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK]; + w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK]; + w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK]; + w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK]; + w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK]; /* Mix the result back in with a twist */ - input_pool.pool[i] = (w >> 3) ^ twist_table[w & 7]; + input_pool_data[i] = (w >> 3) ^ twist_table[w & 7]; /* * Normally, we add 7 bits of rotation to the pool. @@ -1369,7 +1364,7 @@ static void extract_buf(u8 *out) /* Generate a hash across the pool */ spin_lock_irqsave(&input_pool.lock, flags); - blake2s_update(&state, (const u8 *)input_pool.pool, POOL_BYTES); + blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES); blake2s_final(&state, hash); /* final zeros out state */ /* From 248045b8dea5a32ddc0aa44193d6bc70c4b9cd8e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 15 Jan 2022 14:57:22 +0100 Subject: [PATCH 492/493] random: selectively clang-format where it makes sense This is an old driver that has seen a lot of different eras of kernel coding style. In an effort to make it easier to code for, unify the coding style around the current norm, by accepting some of -- but certainly not all of -- the suggestions from clang-format. This should remove ambiguity in coding style, especially with regards to spacing, when code is being changed or amended. Consequently it also makes code review easier on the eyes, following one uniform style rather than several. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 209 ++++++++++++++++++++---------------------- 1 file changed, 99 insertions(+), 110 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6e4218f78260..1f08ce052c54 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -124,7 +124,7 @@ * * The primary kernel interface is * - * void get_random_bytes(void *buf, int nbytes); + * void get_random_bytes(void *buf, int nbytes); * * This interface will return the requested number of random bytes, * and place it in the requested buffer. This is equivalent to a @@ -132,10 +132,10 @@ * * For less critical applications, there are the functions: * - * u32 get_random_u32() - * u64 get_random_u64() - * unsigned int get_random_int() - * unsigned long get_random_long() + * u32 get_random_u32() + * u64 get_random_u64() + * unsigned int get_random_int() + * unsigned long get_random_long() * * These are produced by a cryptographic RNG seeded from get_random_bytes, * and so do not deplete the entropy pool as much. These are recommended @@ -197,10 +197,10 @@ * from the devices are: * * void add_device_randomness(const void *buf, unsigned int size); - * void add_input_randomness(unsigned int type, unsigned int code, + * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); * void add_interrupt_randomness(int irq); - * void add_disk_randomness(struct gendisk *disk); + * void add_disk_randomness(struct gendisk *disk); * void add_hwgenerator_randomness(const char *buffer, size_t count, * size_t entropy); * void add_bootloader_randomness(const void *buf, unsigned int size); @@ -296,8 +296,8 @@ * /dev/random and /dev/urandom created already, they can be created * by using the commands: * - * mknod /dev/random c 1 8 - * mknod /dev/urandom c 1 9 + * mknod /dev/random c 1 8 + * mknod /dev/urandom c 1 9 * * Acknowledgements: * ================= @@ -443,9 +443,9 @@ static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); struct crng_state { - u32 state[16]; - unsigned long init_time; - spinlock_t lock; + u32 state[16]; + unsigned long init_time; + spinlock_t lock; }; static struct crng_state primary_crng = { @@ -469,7 +469,7 @@ static bool crng_need_final_init = false; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; -#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) +#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE) static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]); static void _crng_backtrack_protect(struct crng_state *crng, u8 tmp[CHACHA_BLOCK_SIZE], int used); @@ -509,7 +509,7 @@ static ssize_t _extract_entropy(void *buf, size_t nbytes); static void crng_reseed(struct crng_state *crng, bool use_input_pool); -static u32 const twist_table[8] = { +static const u32 twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; @@ -579,10 +579,10 @@ static void mix_pool_bytes(const void *in, int nbytes) } struct fast_pool { - u32 pool[4]; - unsigned long last; - u16 reg_idx; - u8 count; + u32 pool[4]; + unsigned long last; + u16 reg_idx; + u8 count; }; /* @@ -710,7 +710,7 @@ static int credit_entropy_bits_safe(int nbits) return -EINVAL; /* Cap the value to avoid overflows */ - nbits = min(nbits, POOL_BITS); + nbits = min(nbits, POOL_BITS); credit_entropy_bits(nbits); return 0; @@ -722,7 +722,7 @@ static int credit_entropy_bits_safe(int nbits) * *********************************************************************/ -#define CRNG_RESEED_INTERVAL (300*HZ) +#define CRNG_RESEED_INTERVAL (300 * HZ) static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); @@ -746,9 +746,9 @@ early_param("random.trust_cpu", parse_trust_cpu); static bool crng_init_try_arch(struct crng_state *crng) { - int i; - bool arch_init = true; - unsigned long rv; + int i; + bool arch_init = true; + unsigned long rv; for (i = 4; i < 16; i++) { if (!arch_get_random_seed_long(&rv) && @@ -764,9 +764,9 @@ static bool crng_init_try_arch(struct crng_state *crng) static bool __init crng_init_try_arch_early(struct crng_state *crng) { - int i; - bool arch_init = true; - unsigned long rv; + int i; + bool arch_init = true; + unsigned long rv; for (i = 4; i < 16; i++) { if (!arch_get_random_seed_long_early(&rv) && @@ -836,7 +836,7 @@ static void do_numa_crng_init(struct work_struct *work) struct crng_state *crng; struct crng_state **pool; - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL); + pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL); for_each_online_node(i) { crng = kmalloc_node(sizeof(struct crng_state), GFP_KERNEL | __GFP_NOFAIL, i); @@ -892,7 +892,7 @@ static size_t crng_fast_load(const u8 *cp, size_t len) spin_unlock_irqrestore(&primary_crng.lock, flags); return 0; } - p = (u8 *) &primary_crng.state[4]; + p = (u8 *)&primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; ret++; @@ -922,12 +922,12 @@ static size_t crng_fast_load(const u8 *cp, size_t len) */ static int crng_slow_load(const u8 *cp, size_t len) { - unsigned long flags; - static u8 lfsr = 1; - u8 tmp; - unsigned int i, max = CHACHA_KEY_SIZE; - const u8 * src_buf = cp; - u8 * dest_buf = (u8 *) &primary_crng.state[4]; + unsigned long flags; + static u8 lfsr = 1; + u8 tmp; + unsigned int i, max = CHACHA_KEY_SIZE; + const u8 *src_buf = cp; + u8 *dest_buf = (u8 *)&primary_crng.state[4]; if (!spin_trylock_irqsave(&primary_crng.lock, flags)) return 0; @@ -938,7 +938,7 @@ static int crng_slow_load(const u8 *cp, size_t len) if (len > max) max = len; - for (i = 0; i < max ; i++) { + for (i = 0; i < max; i++) { tmp = lfsr; lfsr >>= 1; if (tmp & 1) @@ -953,11 +953,11 @@ static int crng_slow_load(const u8 *cp, size_t len) static void crng_reseed(struct crng_state *crng, bool use_input_pool) { - unsigned long flags; - int i, num; + unsigned long flags; + int i, num; union { - u8 block[CHACHA_BLOCK_SIZE]; - u32 key[8]; + u8 block[CHACHA_BLOCK_SIZE]; + u32 key[8]; } buf; if (use_input_pool) { @@ -971,11 +971,11 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) } spin_lock_irqsave(&crng->lock, flags); for (i = 0; i < 8; i++) { - unsigned long rv; + unsigned long rv; if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); - crng->state[i+4] ^= buf.key[i] ^ rv; + crng->state[i + 4] ^= buf.key[i] ^ rv; } memzero_explicit(&buf, sizeof(buf)); WRITE_ONCE(crng->init_time, jiffies); @@ -983,8 +983,7 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) crng_finalize_init(crng); } -static void _extract_crng(struct crng_state *crng, - u8 out[CHACHA_BLOCK_SIZE]) +static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) { unsigned long flags, init_time; @@ -1013,9 +1012,9 @@ static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) static void _crng_backtrack_protect(struct crng_state *crng, u8 tmp[CHACHA_BLOCK_SIZE], int used) { - unsigned long flags; - u32 *s, *d; - int i; + unsigned long flags; + u32 *s, *d; + int i; used = round_up(used, sizeof(u32)); if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { @@ -1023,9 +1022,9 @@ static void _crng_backtrack_protect(struct crng_state *crng, used = 0; } spin_lock_irqsave(&crng->lock, flags); - s = (u32 *) &tmp[used]; + s = (u32 *)&tmp[used]; d = &crng->state[4]; - for (i=0; i < 8; i++) + for (i = 0; i < 8; i++) *d++ ^= *s++; spin_unlock_irqrestore(&crng->lock, flags); } @@ -1070,7 +1069,6 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) return ret; } - /********************************************************************* * * Entropy input management @@ -1165,11 +1163,11 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) * Round down by 1 bit on general principles, * and limit entropy estimate to 12 bits. */ - credit_entropy_bits(min_t(int, fls(delta>>1), 11)); + credit_entropy_bits(min_t(int, fls(delta >> 1), 11)); } void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) + unsigned int value) { static unsigned char last_value; @@ -1189,19 +1187,19 @@ static DEFINE_PER_CPU(struct fast_pool, irq_randomness); #ifdef ADD_INTERRUPT_BENCH static unsigned long avg_cycles, avg_deviation; -#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ -#define FIXED_1_2 (1 << (AVG_SHIFT-1)) +#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ +#define FIXED_1_2 (1 << (AVG_SHIFT - 1)) static void add_interrupt_bench(cycles_t start) { - long delta = random_get_entropy() - start; + long delta = random_get_entropy() - start; - /* Use a weighted moving average */ - delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); - avg_cycles += delta; - /* And average deviation */ - delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); - avg_deviation += delta; + /* Use a weighted moving average */ + delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); + avg_cycles += delta; + /* And average deviation */ + delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); + avg_deviation += delta; } #else #define add_interrupt_bench(x) @@ -1209,7 +1207,7 @@ static void add_interrupt_bench(cycles_t start) static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { - u32 *ptr = (u32 *) regs; + u32 *ptr = (u32 *)regs; unsigned int idx; if (regs == NULL) @@ -1224,12 +1222,12 @@ static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) void add_interrupt_randomness(int irq) { - struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); - struct pt_regs *regs = get_irq_regs(); - unsigned long now = jiffies; - cycles_t cycles = random_get_entropy(); - u32 c_high, j_high; - u64 ip; + struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); + struct pt_regs *regs = get_irq_regs(); + unsigned long now = jiffies; + cycles_t cycles = random_get_entropy(); + u32 c_high, j_high; + u64 ip; if (cycles == 0) cycles = get_reg(fast_pool, regs); @@ -1239,8 +1237,8 @@ void add_interrupt_randomness(int irq) fast_pool->pool[1] ^= now ^ c_high; ip = regs ? instruction_pointer(regs) : _RET_IP_; fast_pool->pool[2] ^= ip; - fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : - get_reg(fast_pool, regs); + fast_pool->pool[3] ^= + (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs); fast_mix(fast_pool); add_interrupt_bench(cycles); @@ -1254,8 +1252,7 @@ void add_interrupt_randomness(int irq) return; } - if ((fast_pool->count < 64) && - !time_after(now, fast_pool->last + HZ)) + if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ)) return; if (!spin_trylock(&input_pool.lock)) @@ -1319,7 +1316,7 @@ retry: entropy_count = 0; } nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); - if ((size_t) entropy_count > nfrac) + if ((size_t)entropy_count > nfrac) entropy_count -= nfrac; else entropy_count = 0; @@ -1422,10 +1419,9 @@ static ssize_t extract_entropy(void *buf, size_t nbytes, int min) } #define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *) _RET_IP_, (previous)) + _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) -static void _warn_unseeded_randomness(const char *func_name, void *caller, - void **previous) +static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) { #ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM const bool print_once = false; @@ -1433,8 +1429,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, static bool print_once __read_mostly; #endif - if (print_once || - crng_ready() || + if (print_once || crng_ready() || (previous && (caller == READ_ONCE(*previous)))) return; WRITE_ONCE(*previous, caller); @@ -1442,9 +1437,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, print_once = true; #endif if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS " - "with crng_init=%d\n", func_name, caller, - crng_init); + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", + func_name, caller, crng_init); } /* @@ -1487,7 +1481,6 @@ void get_random_bytes(void *buf, int nbytes) } EXPORT_SYMBOL(get_random_bytes); - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another @@ -1526,7 +1519,7 @@ static void try_to_generate_entropy(void) timer_setup_on_stack(&stack.timer, entropy_timer, 0); while (!crng_ready()) { if (!timer_pending(&stack.timer)) - mod_timer(&stack.timer, jiffies+1); + mod_timer(&stack.timer, jiffies + 1); mix_pool_bytes(&stack.now, sizeof(stack.now)); schedule(); stack.now = random_get_entropy(); @@ -1736,9 +1729,8 @@ void rand_initialize_disk(struct gendisk *disk) } #endif -static ssize_t -urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) +static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) { int ret; @@ -1748,8 +1740,8 @@ urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, return ret; } -static ssize_t -urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) { static int maxwarn = 10; @@ -1763,8 +1755,8 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) return urandom_read_nowarn(file, buf, nbytes, ppos); } -static ssize_t -random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) { int ret; @@ -1774,8 +1766,7 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) return urandom_read_nowarn(file, buf, nbytes, ppos); } -static __poll_t -random_poll(struct file *file, poll_table * wait) +static __poll_t random_poll(struct file *file, poll_table *wait) { __poll_t mask; @@ -1789,8 +1780,7 @@ random_poll(struct file *file, poll_table * wait) return mask; } -static int -write_pool(const char __user *buffer, size_t count) +static int write_pool(const char __user *buffer, size_t count) { size_t bytes; u32 t, buf[16]; @@ -1892,9 +1882,9 @@ static int random_fasync(int fd, struct file *filp, int on) } const struct file_operations random_fops = { - .read = random_read, + .read = random_read, .write = random_write, - .poll = random_poll, + .poll = random_poll, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, @@ -1902,7 +1892,7 @@ const struct file_operations random_fops = { }; const struct file_operations urandom_fops = { - .read = urandom_read, + .read = urandom_read, .write = random_write, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -1910,19 +1900,19 @@ const struct file_operations urandom_fops = { .llseek = noop_llseek, }; -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, - unsigned int, flags) +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, + flags) { int ret; - if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE)) + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; /* * Requesting insecure and blocking randomness at the same time makes * no sense. */ - if ((flags & (GRND_INSECURE|GRND_RANDOM)) == (GRND_INSECURE|GRND_RANDOM)) + if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) return -EINVAL; if (count > INT_MAX) @@ -1962,8 +1952,8 @@ static char sysctl_bootid[16]; * returned as an ASCII string in the standard UUID format; if via the * sysctl system call, as 16 bytes of binary data. */ -static int proc_do_uuid(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table fake_table; unsigned char buf[64], tmp_uuid[16], *uuid; @@ -1992,8 +1982,8 @@ static int proc_do_uuid(struct ctl_table *table, int write, /* * Return entropy available scaled to integral bits */ -static int proc_do_entropy(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +static int proc_do_entropy(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table fake_table; int entropy_count; @@ -2070,7 +2060,7 @@ struct ctl_table random_table[] = { #endif { } }; -#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_SYSCTL */ struct batched_entropy { union { @@ -2090,7 +2080,7 @@ struct batched_entropy { * point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), }; u64 get_random_u64(void) @@ -2115,7 +2105,7 @@ u64 get_random_u64(void) EXPORT_SYMBOL(get_random_u64); static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), }; u32 get_random_u32(void) { @@ -2147,7 +2137,7 @@ static void invalidate_batched_entropy(void) int cpu; unsigned long flags; - for_each_possible_cpu (cpu) { + for_each_possible_cpu(cpu) { struct batched_entropy *batched_entropy; batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); @@ -2176,8 +2166,7 @@ static void invalidate_batched_entropy(void) * Return: A page aligned address within [start, start + range). On error, * @start is returned. */ -unsigned long -randomize_page(unsigned long start, unsigned long range) +unsigned long randomize_page(unsigned long start, unsigned long range) { if (!PAGE_ALIGNED(start)) { range -= PAGE_ALIGN(start) - start; From a254a0e4093fce8c832414a83940736067eed515 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 17 Jan 2022 18:43:02 +0100 Subject: [PATCH 493/493] random: simplify arithmetic function flow in account() Now that have_bytes is never modified, we can simplify this function. First, we move the check for negative entropy_count to be first. That ensures that subsequent reads of this will be non-negative. Then, have_bytes and ibytes can be folded into their one use site in the min_t() function. Suggested-by: Dominik Brodowski Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1f08ce052c54..b411182df6f6 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1293,7 +1293,7 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); */ static size_t account(size_t nbytes, int min) { - int entropy_count, orig, have_bytes; + int entropy_count, orig; size_t ibytes, nfrac; BUG_ON(input_pool.entropy_count > POOL_FRACBITS); @@ -1301,20 +1301,15 @@ static size_t account(size_t nbytes, int min) /* Can we pull enough? */ retry: entropy_count = orig = READ_ONCE(input_pool.entropy_count); - ibytes = nbytes; - /* never pull more than available */ - have_bytes = entropy_count >> (POOL_ENTROPY_SHIFT + 3); - - if (have_bytes < 0) - have_bytes = 0; - ibytes = min_t(size_t, ibytes, have_bytes); - if (ibytes < min) - ibytes = 0; - if (WARN_ON(entropy_count < 0)) { pr_warn("negative entropy count: count %d\n", entropy_count); entropy_count = 0; } + + /* never pull more than available */ + ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3)); + if (ibytes < min) + ibytes = 0; nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); if ((size_t)entropy_count > nfrac) entropy_count -= nfrac;