From 4843d38fe817f702596c3156cce262e2c5496af7 Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Mon, 13 Mar 2023 20:16:54 +0800 Subject: [PATCH 01/79] arm64: rockchip_defconfig: Disable A510 ERRATUM 2051678 -CONFIG_ARM64_ERRATUM_2051678 Signed-off-by: Tao Huang Change-Id: I95fd2c9007c1e14cd90f5df16eb655b72fced901 --- arch/arm64/configs/rockchip_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/rockchip_defconfig b/arch/arm64/configs/rockchip_defconfig index 9ae06bf3b982..c8077aceb851 100644 --- a/arch/arm64/configs/rockchip_defconfig +++ b/arch/arm64/configs/rockchip_defconfig @@ -56,6 +56,7 @@ CONFIG_ARCH_ROCKCHIP=y # CONFIG_ARM64_ERRATUM_1463225 is not set # CONFIG_ARM64_ERRATUM_1542419 is not set # CONFIG_ARM64_ERRATUM_1508412 is not set +# CONFIG_ARM64_ERRATUM_2051678 is not set # CONFIG_ARM64_ERRATUM_2054223 is not set # CONFIG_ARM64_ERRATUM_2067961 is not set # CONFIG_CAVIUM_ERRATUM_22375 is not set From 9f9f111a7f0a114665a8bd927dfe46f3525beb6b Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Mon, 13 Mar 2023 21:06:06 +0800 Subject: [PATCH 02/79] arm64: rockchip_linux_defconfig: Disable A510 ERRATUM 2051678 -CONFIG_ARM64_ERRATUM_2051678 Signed-off-by: Tao Huang Change-Id: Icffee7d1855f0feb142905dfec39b1413a1f8e8d --- arch/arm64/configs/rockchip_linux_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/rockchip_linux_defconfig b/arch/arm64/configs/rockchip_linux_defconfig index 63dd66ae2566..81a87c158bfb 100644 --- a/arch/arm64/configs/rockchip_linux_defconfig +++ b/arch/arm64/configs/rockchip_linux_defconfig @@ -34,6 +34,7 @@ CONFIG_ARCH_ROCKCHIP=y # CONFIG_ARM64_ERRATUM_1463225 is not set # CONFIG_ARM64_ERRATUM_1542419 is not set # CONFIG_ARM64_ERRATUM_1508412 is not set +# CONFIG_ARM64_ERRATUM_2051678 is not set # CONFIG_ARM64_ERRATUM_2054223 is not set # CONFIG_ARM64_ERRATUM_2067961 is not set # CONFIG_CAVIUM_ERRATUM_22375 is not set From ee1d33de8b79096f5f407ee6762b6be63eb30fcd Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Tue, 14 Mar 2023 16:17:54 +0800 Subject: [PATCH 03/79] ARM: rv1106_defconfig: Disable CONFIG_RANDOM_TRUST_BOOTLOADER Fixes: 4603a37f6eae ("random: credit cpu and bootloader seeds by default") Signed-off-by: Tao Huang Change-Id: I911f729cb469cba0438524ea49ccf11f96a5b941 --- arch/arm/configs/rv1106_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/rv1106_defconfig b/arch/arm/configs/rv1106_defconfig index 10a3df77a9bf..bf50891eec94 100644 --- a/arch/arm/configs/rv1106_defconfig +++ b/arch/arm/configs/rv1106_defconfig @@ -116,6 +116,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=6 CONFIG_SERIAL_8250_DW=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_ROCKCHIP=y +# CONFIG_RANDOM_TRUST_BOOTLOADER is not set CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_RK3X=y From a60b3147328a80a3d456712ec2552bae41337816 Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Tue, 14 Mar 2023 16:21:10 +0800 Subject: [PATCH 04/79] ARM: rv1106_defconfig: Set CONFIG_INET_TABLE_PERTURB_ORDER=8 On embedded systems with little memory and no relevant security concerns, it is beneficial to reduce the size of the table. Reducing the size from 2^16 to 2^8 saves 255 KiB of kernel RAM. Fixes: 7e8eaa939eea ("tcp: configurable source port perturb table size") Signed-off-by: Tao Huang Change-Id: I5da1509bbc7b539c367b696bccc67722863cafbe --- arch/arm/configs/rv1106_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/rv1106_defconfig b/arch/arm/configs/rv1106_defconfig index bf50891eec94..8b85fa1083e5 100644 --- a/arch/arm/configs/rv1106_defconfig +++ b/arch/arm/configs/rv1106_defconfig @@ -53,6 +53,7 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y +CONFIG_INET_TABLE_PERTURB_ORDER=8 # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set From c05a83fed488d93733f402f949dc2655771476d2 Mon Sep 17 00:00:00 2001 From: Sandy Huang Date: Wed, 15 Mar 2023 14:40:03 +0800 Subject: [PATCH 05/79] drm/rockchip: vop3: fix esmart area[3] register offset error Signed-off-by: Sandy Huang Change-Id: Ic87252cecfeb161d6bea0572ffc570003350dc86 --- drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 8079a32dca31..05d9f1e052ee 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -2196,9 +2196,9 @@ static const struct vop2_scl_regs rk3568_area3_scl = { .vsd_yrgb_gt4 = VOP_REG(RK3568_ESMART0_REGION3_CTRL, 0x1, 9), .vsd_cbcr_gt2 = VOP_REG(RK3568_ESMART0_REGION3_CTRL, 0x1, 10), .vsd_cbcr_gt4 = VOP_REG(RK3568_ESMART0_REGION3_CTRL, 0x1, 11), - .xavg_en = VOP_REG(RK3568_ESMART0_REGION2_CTRL, 0x1, 20),/* supported from vop3 */ - .xgt_en = VOP_REG(RK3568_ESMART0_REGION2_CTRL, 0x1, 21), - .xgt_mode = VOP_REG(RK3568_ESMART0_REGION2_CTRL, 0x3, 22), + .xavg_en = VOP_REG(RK3568_ESMART0_REGION3_CTRL, 0x1, 20),/* supported from vop3 */ + .xgt_en = VOP_REG(RK3568_ESMART0_REGION3_CTRL, 0x1, 21), + .xgt_mode = VOP_REG(RK3568_ESMART0_REGION3_CTRL, 0x3, 22), }; static const struct vop2_win_regs rk3568_area1_data = { From b864542b3f30a3147692bd1be710501bfb84a911 Mon Sep 17 00:00:00 2001 From: Zefa Chen Date: Fri, 10 Mar 2023 21:28:32 +0800 Subject: [PATCH 06/79] media: rockchip: vicap: fix warning of vb2 cancel or done Signed-off-by: Zefa Chen Change-Id: I51079641446159444fd6dc2223e5a3f9062001fa --- drivers/media/platform/rockchip/cif/capture.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/media/platform/rockchip/cif/capture.c b/drivers/media/platform/rockchip/cif/capture.c index d9f29f6a6dc7..d6055b0c8828 100644 --- a/drivers/media/platform/rockchip/cif/capture.c +++ b/drivers/media/platform/rockchip/cif/capture.c @@ -4467,6 +4467,14 @@ void rkcif_do_stop_stream(struct rkcif_stream *stream, vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); } INIT_LIST_HEAD(&stream->buf_head); + while (!list_empty(&stream->vb_done_list)) { + buf = list_first_entry(&stream->vb_done_list, + struct rkcif_buffer, queue); + if (buf) { + list_del(&buf->queue); + vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); + } + } stream->lack_buf_cnt = 0; stream->dma_en &= ~RKCIF_DMAEN_BY_VICAP; } From 25a47e3925be571f8aade77afcb3e0e72cf9c58b Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Wed, 15 Mar 2023 21:15:30 +0800 Subject: [PATCH 07/79] PCI: rockchip: dw: Support deferred probe for ep Change-Id: I0f032618d97f88a0cd70ae164c3f6f6ef6494515 Signed-off-by: Xiao Ya peng Signed-off-by: Simon Xue Signed-off-by: Jon Lin --- drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c index 0fe8714c7b0c..b315bb97a70f 100644 --- a/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c +++ b/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c @@ -1133,9 +1133,10 @@ static struct platform_driver rk_plat_pcie_driver = { .of_match_table = rockchip_pcie_ep_of_match, .suppress_bind_attrs = true, }, + .probe = rockchip_pcie_ep_probe, }; -module_platform_driver_probe(rk_plat_pcie_driver, rockchip_pcie_ep_probe); +module_platform_driver(rk_plat_pcie_driver); MODULE_AUTHOR("Simon Xue "); MODULE_DESCRIPTION("RockChip PCIe Controller EP driver"); From 3f1bcfe6ec3a1774c5c9272fadf17303772fe19c Mon Sep 17 00:00:00 2001 From: Algea Cao Date: Tue, 14 Mar 2023 17:16:56 +0800 Subject: [PATCH 08/79] drm: bridge: dw-hdmi: Fixed ddc error caused by plug out hdmi when reading edid Signed-off-by: Algea Cao Change-Id: Ie220584b5ae17822170bf761f699d8897caf9975 --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 32 ++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 5b45fe44579f..990271cae22f 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -420,18 +420,14 @@ static void repo_hpd_event(struct work_struct *p_work) if (hdmi->bridge.dev) { bool change; - void *data = hdmi->plat_data->phy_data; change = drm_helper_hpd_irq_event(hdmi->bridge.dev); - if (change) { - if (hdmi->plat_data->set_ddc_io) - hdmi->plat_data->set_ddc_io(data, hdmi->hpd_state); - if (hdmi->cec_adap->devnode.registered) - cec_queue_pin_hpd_event(hdmi->cec_adap, - hdmi->hpd_state, - ktime_get()); - } + if (change && hdmi->cec_adap && + hdmi->cec_adap->devnode.registered) + cec_queue_pin_hpd_event(hdmi->cec_adap, + hdmi->hpd_state, + ktime_get()); drm_bridge_hpd_notify(&hdmi->bridge, status); } } @@ -632,7 +628,11 @@ static int dw_hdmi_i2c_read(struct dw_hdmi *hdmi, while (retry > 0) { if (!(hdmi_readb(hdmi, HDMI_PHY_STAT0) & HDMI_PHY_HPD)) { + void *data = hdmi->plat_data->phy_data; + dev_dbg(hdmi->dev, "hdmi disconnect, stop ddc read\n"); + if (hdmi->plat_data->set_ddc_io) + hdmi->plat_data->set_ddc_io(data, false); return -EPERM; } @@ -695,7 +695,11 @@ static int dw_hdmi_i2c_write(struct dw_hdmi *hdmi, while (retry > 0) { if (!(hdmi_readb(hdmi, HDMI_PHY_STAT0) & HDMI_PHY_HPD)) { + void *data = hdmi->plat_data->phy_data; + dev_dbg(hdmi->dev, "hdmi disconnect, stop ddc write\n"); + if (hdmi->plat_data->set_ddc_io) + hdmi->plat_data->set_ddc_io(data, false); return -EPERM; } @@ -736,6 +740,7 @@ static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap, struct dw_hdmi *hdmi = i2c_get_adapdata(adap); struct dw_hdmi_i2c *i2c = hdmi->i2c; u8 addr = msgs[0].addr; + void *data = hdmi->plat_data->phy_data; int i, ret = 0; if (addr == DDC_CI_ADDR) @@ -760,6 +765,9 @@ static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap, mutex_lock(&i2c->lock); + if (hdmi->plat_data->set_ddc_io) + hdmi->plat_data->set_ddc_io(data, true); + hdmi_writeb(hdmi, 0, HDMI_I2CM_SOFTRSTZ); udelay(100); @@ -3925,6 +3933,7 @@ static void dw_hdmi_bridge_atomic_disable(struct drm_bridge *bridge, struct drm_bridge_state *old_state) { struct dw_hdmi *hdmi = bridge->driver_private; + void *data = hdmi->plat_data->phy_data; mutex_lock(&hdmi->mutex); hdmi->disabled = true; @@ -3933,6 +3942,11 @@ static void dw_hdmi_bridge_atomic_disable(struct drm_bridge *bridge, dw_hdmi_update_power(hdmi); dw_hdmi_update_phy_mask(hdmi); mutex_unlock(&hdmi->mutex); + + mutex_lock(&hdmi->i2c->lock); + if (hdmi->plat_data->set_ddc_io) + hdmi->plat_data->set_ddc_io(data, false); + mutex_unlock(&hdmi->i2c->lock); } static void dw_hdmi_bridge_atomic_enable(struct drm_bridge *bridge, From 53222b084946f6f664ecb8e6a43e100382379ff4 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Mon, 13 Mar 2023 17:17:08 +0800 Subject: [PATCH 09/79] ASoC: hdmi-codec: Stop stream when plug state changed Workaround for HDMIIN and HDMIOUT plug-{in,out} when streaming. Actually, we should do stop stream both for HDMI_{OUT,IN} on plug-{out,in} event. but for better experience and depop stream, we optimize as follows: a) Do stop stream for HDMIIN on plug-out when streaming. because HDMIIN work as SLAVE mode, CLK lost after HDMI cable plugged out which will make stream stuck until ALSA timeout(10s). so, for better experience, we should stop stream at the moment. b) Do stop stream for HDMIOUT on plug-in when streaming. because HDMIOUT work as MASTER mode, there is no clk-issue like HDMIIN, but, on HDR situation, HDMI will be reconfigured which make HDMI audio configure lost, especially for NLPCM/HBR bitstream which require IEC937 packet alignment, so, for this situation, we stop stream to notify user to re-open and configure sound card and then go on streaming. Userspace should notice Error-Code(such as -EBADFD) from snd_pcm_write to do reopen. Signed-off-by: Sugar Zhang Change-Id: I85455e56f843afcdc9a5f1d5c9e85733b6732cdf --- sound/soc/codecs/hdmi-codec.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index f0a05fd71d84..0e969297e787 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -845,9 +845,38 @@ static void plugged_cb(struct device *dev, bool plugged) hdmi_codec_jack_report(hcp, 0); memset(hcp->eld, 0, sizeof(hcp->eld)); } + mutex_lock(&hcp->lock); - if (hcp->substream && !plugged) - snd_pcm_stop(hcp->substream, SNDRV_PCM_STATE_DISCONNECTED); + if (hcp->substream) { + /* + * Workaround for HDMIIN and HDMIOUT plug-{in,out} when streaming. + * + * Actually, we should do stop stream both for HDMI_{OUT,IN} on + * plug-{out,in} event. but for better experience and depop stream, + * we optimize as follows: + * + * a) Do stop stream for HDMIIN on plug-out when streaming. + * because HDMIIN work as SLAVE mode, CLK lost after HDMI cable + * plugged out which will make stream stuck until ALSA timeout(10s). + * so, for better experience, we should stop stream at the moment. + * + * b) Do stop stream for HDMIOUT on plug-in when streaming. + * because HDMIOUT work as MASTER mode, there is no clk-issue like + * HDMIIN, but, on HDR situation, HDMI will be reconfigured which + * make HDMI audio configure lost, especially for NLPCM/HBR bitstream + * which require IEC937 packet alignment, so, for this situation, + * we stop stream to notify user to re-open and configure sound card + * and then go on streaming. + */ + int stream = hcp->substream->stream; + + if (stream == SNDRV_PCM_STREAM_PLAYBACK && plugged) + snd_pcm_stop(hcp->substream, SNDRV_PCM_STATE_SETUP); + else if (stream == SNDRV_PCM_STREAM_CAPTURE && !plugged) + snd_pcm_stop(hcp->substream, SNDRV_PCM_STATE_DISCONNECTED); + + dev_dbg(dev, "stream[%d]: %s\n", stream, plugged ? "plug in" : "plug out"); + } mutex_unlock(&hcp->lock); } From fcd99057b0e63e669342cd6c62b25fbbb851d53e Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Tue, 14 Mar 2023 15:45:11 +0800 Subject: [PATCH 10/79] arm64: dts: rockchip: rk3588-*: Enable HDMI Jack This patch use rockchip-hdmi machine driver instead of simple-card to support jack detection. And also replace card name ',' to '-' to support pluseaudio. Signed-off-by: Sugar Zhang Change-Id: I0e085225e89847671e958d0985d678240fdd6295 --- arch/arm64/boot/dts/rockchip/rk3588-nvr.dtsi | 34 +++++++------------ arch/arm64/boot/dts/rockchip/rk3588s-evb.dtsi | 17 ++++------ 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-nvr.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-nvr.dtsi index c39c3252ee4b..d9f69e07dd70 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-nvr.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-nvr.dtsi @@ -50,32 +50,22 @@ hdmi0_sound: hdmi0-sound { status = "disabled"; - compatible = "simple-audio-card"; - simple-audio-card,format = "i2s"; - simple-audio-card,mclk-fs = <128>; - simple-audio-card,name = "rockchip,hdmi0"; - - simple-audio-card,cpu { - sound-dai = <&i2s5_8ch>; - }; - simple-audio-card,codec { - sound-dai = <&hdmi0>; - }; + compatible = "rockchip,hdmi"; + rockchip,mclk-fs = <128>; + rockchip,card-name = "rockchip-hdmi0"; + rockchip,cpu = <&i2s5_8ch>; + rockchip,codec = <&hdmi0>; + rockchip,jack-det; }; hdmi1_sound: hdmi1-sound { status = "disabled"; - compatible = "simple-audio-card"; - simple-audio-card,format = "i2s"; - simple-audio-card,mclk-fs = <128>; - simple-audio-card,name = "rockchip,hdmi1"; - - simple-audio-card,cpu { - sound-dai = <&i2s6_8ch>; - }; - simple-audio-card,codec { - sound-dai = <&hdmi1>; - }; + compatible = "rockchip,hdmi"; + rockchip,mclk-fs = <128>; + rockchip,card-name = "rockchip-hdmi1"; + rockchip,cpu = <&i2s6_8ch>; + rockchip,codec = <&hdmi1>; + rockchip,jack-det; }; test-power { diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-evb.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-evb.dtsi index c8a1ded05051..6db4c5734836 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-evb.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s-evb.dtsi @@ -96,17 +96,12 @@ hdmi0_sound: hdmi0-sound { status = "disabled"; - compatible = "simple-audio-card"; - simple-audio-card,format = "i2s"; - simple-audio-card,mclk-fs = <128>; - simple-audio-card,name = "rockchip,hdmi0"; - - simple-audio-card,cpu { - sound-dai = <&i2s5_8ch>; - }; - simple-audio-card,codec { - sound-dai = <&hdmi0>; - }; + compatible = "rockchip,hdmi"; + rockchip,mclk-fs = <128>; + rockchip,card-name = "rockchip-hdmi0"; + rockchip,cpu = <&i2s5_8ch>; + rockchip,codec = <&hdmi0>; + rockchip,jack-det; }; spdif_tx1_dc: spdif-tx1-dc { From b87d415824a11d1d3c574b0ad6b4c4846f1c5335 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Wed, 15 Mar 2023 10:18:35 +0800 Subject: [PATCH 11/79] ASoC: rockchip: i2s-tdm: Simplify clk reparent on TRCM mode This patch do reparent CLK_TX/RX to the same parent on TRCM mode in driver instead of assign parent from DT. Now, the assigned-parent from DT can be removed. -assigned-clocks = <&cru SCLK_I2S0_8CH_RX>; -assigned-clock-parents = <&cru SCLK_I2S0_8CH_TX_MUX>; Signed-off-by: Sugar Zhang Change-Id: I0abda09a1348d05306e4026f61f7daa2850ae323 --- sound/soc/rockchip/rockchip_i2s_tdm.c | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index d15b777d5b70..f1043607ec1c 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -916,6 +916,41 @@ out: return ret; } +static int rockchip_i2s_tdm_mclk_reparent(struct rk_i2s_tdm_dev *i2s_tdm) +{ + struct clk *parent; + int ret = 0; + + /* reparent to the same clk on TRCM mode */ + switch (i2s_tdm->clk_trcm) { + case I2S_CKR_TRCM_TXONLY: + parent = clk_get_parent(i2s_tdm->mclk_tx); + /* + * API clk_has_parent is not available yet on GKI, so we + * use clk_set_parent directly and ignore the ret value. + * if the API has addressed on GKI, should remove it. + */ +#ifdef CONFIG_NO_GKI + if (clk_has_parent(i2s_tdm->mclk_rx, parent)) + ret = clk_set_parent(i2s_tdm->mclk_rx, parent); +#else + clk_set_parent(i2s_tdm->mclk_rx, parent); +#endif + break; + case I2S_CKR_TRCM_RXONLY: + parent = clk_get_parent(i2s_tdm->mclk_rx); +#ifdef CONFIG_NO_GKI + if (clk_has_parent(i2s_tdm->mclk_tx, parent)) + ret = clk_set_parent(i2s_tdm->mclk_tx, parent); +#else + clk_set_parent(i2s_tdm->mclk_tx, parent); +#endif + break; + } + + return ret; +} + static int rockchip_i2s_tdm_set_mclk(struct rk_i2s_tdm_dev *i2s_tdm, struct snd_pcm_substream *substream, struct clk **mclk) @@ -941,6 +976,10 @@ static int rockchip_i2s_tdm_set_mclk(struct rk_i2s_tdm_dev *i2s_tdm, if (ret) goto err; + ret = rockchip_i2s_tdm_mclk_reparent(i2s_tdm); + if (ret) + goto err; + /* mclk_rx is also ok. */ *mclk = i2s_tdm->mclk_tx; } else { @@ -2224,6 +2263,10 @@ static int rockchip_i2s_tdm_probe(struct platform_device *pdev) clk_set_rate(i2s_tdm->mclk_rx, rate); clk_set_rate(i2s_tdm->mclk_tx, rate); + ret = rockchip_i2s_tdm_mclk_reparent(i2s_tdm); + if (ret) + goto err_pm_disable; + regmap_update_bits(i2s_tdm->regmap, I2S_CLKDIV, I2S_CLKDIV_RXM_MASK | I2S_CLKDIV_TXM_MASK, I2S_CLKDIV_RXM(div_bclk) | I2S_CLKDIV_TXM(div_bclk)); From adb1f241f3a579c4041f56833d4761c08552f38a Mon Sep 17 00:00:00 2001 From: Frank Wang Date: Wed, 15 Mar 2023 11:08:16 +0800 Subject: [PATCH 12/79] usb: typec: tcpm: fix getting pd_revision Move up getting pd_revision in case it would be skipped while power-role is configured as source. Fixes: 765b15bf7940 ("usb: typec: tcpm: amend pd negotiated revision") Signed-off-by: Frank Wang Change-Id: I63d06d622cc00db6bacb301f6e61aba0a36cd854 --- drivers/usb/typec/tcpm/tcpm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 788eb05a193c..a088c6e91e37 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -6059,6 +6059,13 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, if (!fwnode) return -EINVAL; + ret = fwnode_property_read_u32(fwnode, "pd-revision", + &pd_revision); + if (ret < 0) + port->typec_caps.pd_revision = 0x0300; + else + port->typec_caps.pd_revision = pd_revision & 0xffff; + /* USB data support is optional */ ret = fwnode_property_read_string(fwnode, "data-role", &cap_str); if (ret == 0) { @@ -6162,13 +6169,6 @@ sink: return ret; } - ret = fwnode_property_read_u32(fwnode, "pd-revision", - &pd_revision); - if (ret < 0) - port->typec_caps.pd_revision = 0x0300; - else - port->typec_caps.pd_revision = pd_revision & 0xffff; - return 0; } From 775633a1edaecd5a01954108a21c9103641cdc9a Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Wed, 15 Mar 2023 21:49:16 +0800 Subject: [PATCH 13/79] ASoC: inno_rk3036: fixes and clean up some handles We haven't used the rk3036 codec driver on upstream for a long time, it seems like it needs some changes to work well with our EVB. - Clean up the process of DAC working/closing - Since there is no mixer on the path, remove the mixer widgets to make simple the driver path - Only keep one control node "Headphone Switch" to simplify user operation - Add the control gpio of power amplifier Signed-off-by: Xing Zheng Change-Id: Ia4b41b0148fe9680f58abe4113d1276f260ef1b5 --- sound/soc/codecs/inno_rk3036.c | 243 +++++++++++++++++---------------- 1 file changed, 124 insertions(+), 119 deletions(-) diff --git a/sound/soc/codecs/inno_rk3036.c b/sound/soc/codecs/inno_rk3036.c index d0e8f0d2fbc1..5694f4c81851 100644 --- a/sound/soc/codecs/inno_rk3036.c +++ b/sound/soc/codecs/inno_rk3036.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -29,68 +30,40 @@ struct rk3036_codec_priv { struct clk *pclk; struct regmap *regmap; struct device *dev; + struct gpio_desc *pa_ctl; }; static const DECLARE_TLV_DB_MINMAX(rk3036_codec_hp_tlv, -39, 0); -static int rk3036_codec_antipop_info(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) +static int rk3036_codec_antipop_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) { - uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; - uinfo->count = 2; - uinfo->value.integer.min = 0; - uinfo->value.integer.max = 1; - - return 0; -} - -static int rk3036_codec_antipop_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - int val, regval; - - regval = snd_soc_component_read(component, INNO_R09); - val = ((regval >> INNO_R09_HPL_ANITPOP_SHIFT) & - INNO_R09_HP_ANTIPOP_MSK) == INNO_R09_HP_ANTIPOP_ON; - ucontrol->value.integer.value[0] = val; - - val = ((regval >> INNO_R09_HPR_ANITPOP_SHIFT) & - INNO_R09_HP_ANTIPOP_MSK) == INNO_R09_HP_ANTIPOP_ON; - ucontrol->value.integer.value[1] = val; - - return 0; -} - -static int rk3036_codec_antipop_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); int val, ret, regmsk; - val = (ucontrol->value.integer.value[0] ? - INNO_R09_HP_ANTIPOP_ON : INNO_R09_HP_ANTIPOP_OFF) << - INNO_R09_HPL_ANITPOP_SHIFT; - val |= (ucontrol->value.integer.value[1] ? - INNO_R09_HP_ANTIPOP_ON : INNO_R09_HP_ANTIPOP_OFF) << - INNO_R09_HPR_ANITPOP_SHIFT; - - regmsk = INNO_R09_HP_ANTIPOP_MSK << INNO_R09_HPL_ANITPOP_SHIFT | - INNO_R09_HP_ANTIPOP_MSK << INNO_R09_HPR_ANITPOP_SHIFT; + regmsk = INNO_R09_HP_ANTIPOP_MSK << w->shift; + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + val = INNO_R09_HP_ANTIPOP_ON << w->shift; + break; + case SND_SOC_DAPM_POST_PMD: + val = INNO_R09_HP_ANTIPOP_OFF << w->shift; + break; + default: + return 0; + } ret = snd_soc_component_update_bits(component, INNO_R09, regmsk, val); if (ret < 0) return ret; + /* Need to wait POP Sound VCM is stable */ + msleep(50); + return 0; } -#define SOC_RK3036_CODEC_ANTIPOP_DECL(xname) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ - .info = rk3036_codec_antipop_info, .get = rk3036_codec_antipop_get, \ - .put = rk3036_codec_antipop_put, } - static const struct snd_kcontrol_new rk3036_codec_dapm_controls[] = { SOC_DOUBLE_R_RANGE_TLV("Headphone Volume", INNO_R07, INNO_R08, INNO_HP_GAIN_SHIFT, INNO_HP_GAIN_N39DB, @@ -99,68 +72,64 @@ static const struct snd_kcontrol_new rk3036_codec_dapm_controls[] = { INNO_R06_VOUTR_CZ_SHIFT, 1, 0), SOC_DOUBLE("Headphone Switch", INNO_R09, INNO_R09_HPL_MUTE_SHIFT, INNO_R09_HPR_MUTE_SHIFT, 1, 0), - SOC_RK3036_CODEC_ANTIPOP_DECL("Anti-pop Switch"), -}; - -static const struct snd_kcontrol_new rk3036_codec_hpl_mixer_controls[] = { - SOC_DAPM_SINGLE("DAC Left Out Switch", INNO_R09, - INNO_R09_DACL_SWITCH_SHIFT, 1, 0), -}; - -static const struct snd_kcontrol_new rk3036_codec_hpr_mixer_controls[] = { - SOC_DAPM_SINGLE("DAC Right Out Switch", INNO_R09, - INNO_R09_DACR_SWITCH_SHIFT, 1, 0), -}; - -static const struct snd_kcontrol_new rk3036_codec_hpl_switch_controls[] = { - SOC_DAPM_SINGLE("HP Left Out Switch", INNO_R05, - INNO_R05_HPL_WORK_SHIFT, 1, 0), -}; - -static const struct snd_kcontrol_new rk3036_codec_hpr_switch_controls[] = { - SOC_DAPM_SINGLE("HP Right Out Switch", INNO_R05, - INNO_R05_HPR_WORK_SHIFT, 1, 0), }; static const struct snd_soc_dapm_widget rk3036_codec_dapm_widgets[] = { - SND_SOC_DAPM_SUPPLY_S("DAC PWR", 1, INNO_R06, - INNO_R06_DAC_EN_SHIFT, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY_S("DACL VREF", 2, INNO_R04, - INNO_R04_DACL_VREF_SHIFT, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY_S("DACR VREF", 2, INNO_R04, - INNO_R04_DACR_VREF_SHIFT, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY_S("DACL HiLo VREF", 3, INNO_R06, - INNO_R06_DACL_HILO_VREF_SHIFT, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY_S("DACR HiLo VREF", 3, INNO_R06, - INNO_R06_DACR_HILO_VREF_SHIFT, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY_S("DACR CLK", 3, INNO_R04, - INNO_R04_DACR_CLK_SHIFT, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY_S("DACL CLK", 3, INNO_R04, - INNO_R04_DACL_CLK_SHIFT, 0, NULL, 0), + /* Using S3(Step3) as the starting step by datasheet */ + SND_SOC_DAPM_SUPPLY_S("DAC PWR", 0, INNO_R06, + INNO_R06_DAC_EN_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACL VREF", 1, INNO_R04, + INNO_R04_DACL_VREF_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACR VREF", 1, INNO_R04, + INNO_R04_DACR_VREF_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACL ANTI-POP", 2, SND_SOC_NOPM, + INNO_R09_HPL_ANITPOP_SHIFT, 0, rk3036_codec_antipop_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACR ANTI-POP", 2, SND_SOC_NOPM, + INNO_R09_HPR_ANITPOP_SHIFT, 0, rk3036_codec_antipop_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("HPL OUT EN", 3, INNO_R05, + INNO_R05_HPL_EN_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("HPR OUT EN", 3, INNO_R05, + INNO_R05_HPR_EN_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("HPL OUT WORK", 4, INNO_R05, + INNO_R05_HPL_WORK_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("HPR OUT WORK", 4, INNO_R05, + INNO_R05_HPR_WORK_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACL HiLo VREF", 5, INNO_R06, + INNO_R06_DACL_HILO_VREF_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACR HiLo VREF", 5, INNO_R06, + INNO_R06_DACR_HILO_VREF_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACL CLK", 6, INNO_R04, + INNO_R04_DACL_CLK_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACR CLK", 6, INNO_R04, + INNO_R04_DACR_CLK_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACL WORK", 7, INNO_R04, + INNO_R04_DACL_SW_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY_S("DACR WORK", 7, INNO_R04, + INNO_R04_DACR_SW_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), - SND_SOC_DAPM_DAC("DACL", "Left Playback", INNO_R04, - INNO_R04_DACL_SW_SHIFT, 0), - SND_SOC_DAPM_DAC("DACR", "Right Playback", INNO_R04, - INNO_R04_DACR_SW_SHIFT, 0), + SND_SOC_DAPM_DAC_E("DACL", "Left Playback", INNO_R09, + INNO_R09_DACL_SWITCH_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_DAC_E("DACR", "Right Playback", INNO_R09, + INNO_R09_DACR_SWITCH_SHIFT, 0, NULL, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), - SND_SOC_DAPM_MIXER("Left Headphone Mixer", SND_SOC_NOPM, 0, 0, - rk3036_codec_hpl_mixer_controls, - ARRAY_SIZE(rk3036_codec_hpl_mixer_controls)), - SND_SOC_DAPM_MIXER("Right Headphone Mixer", SND_SOC_NOPM, 0, 0, - rk3036_codec_hpr_mixer_controls, - ARRAY_SIZE(rk3036_codec_hpr_mixer_controls)), - - SND_SOC_DAPM_PGA("HP Left Out", INNO_R05, - INNO_R05_HPL_EN_SHIFT, 0, NULL, 0), - SND_SOC_DAPM_PGA("HP Right Out", INNO_R05, - INNO_R05_HPR_EN_SHIFT, 0, NULL, 0), - - SND_SOC_DAPM_MIXER("HP Left Switch", SND_SOC_NOPM, 0, 0, - rk3036_codec_hpl_switch_controls, - ARRAY_SIZE(rk3036_codec_hpl_switch_controls)), - SND_SOC_DAPM_MIXER("HP Right Switch", SND_SOC_NOPM, 0, 0, - rk3036_codec_hpr_switch_controls, - ARRAY_SIZE(rk3036_codec_hpr_switch_controls)), + SND_SOC_DAPM_AIF_IN("DAI-IN", "Playback", 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_OUTPUT("HPL"), SND_SOC_DAPM_OUTPUT("HPR"), @@ -169,28 +138,39 @@ static const struct snd_soc_dapm_widget rk3036_codec_dapm_widgets[] = { static const struct snd_soc_dapm_route rk3036_codec_dapm_routes[] = { {"DACL VREF", NULL, "DAC PWR"}, {"DACR VREF", NULL, "DAC PWR"}, + {"DACL ANTI-POP", NULL, "DAC PWR"}, + {"DACR ANTI-POP", NULL, "DAC PWR"}, + {"HPL OUT EN", NULL, "DAC PWR"}, + {"HPR OUT EN", NULL, "DAC PWR"}, + {"HPL OUT WORK", NULL, "DAC PWR"}, + {"HPR OUT WORK", NULL, "DAC PWR"}, {"DACL HiLo VREF", NULL, "DAC PWR"}, {"DACR HiLo VREF", NULL, "DAC PWR"}, {"DACL CLK", NULL, "DAC PWR"}, {"DACR CLK", NULL, "DAC PWR"}, + {"DACL WORK", NULL, "DAC PWR"}, + {"DACR WORK", NULL, "DAC PWR"}, {"DACL", NULL, "DACL VREF"}, + {"DACL", NULL, "DACL ANTI-POP"}, + {"DACL", NULL, "HPL OUT EN"}, + {"DACL", NULL, "HPL OUT WORK"}, {"DACL", NULL, "DACL HiLo VREF"}, {"DACL", NULL, "DACL CLK"}, + {"DACL", NULL, "DACL WORK"}, {"DACR", NULL, "DACR VREF"}, + {"DACR", NULL, "DACR ANTI-POP"}, + {"DACR", NULL, "HPR OUT EN"}, + {"DACR", NULL, "HPR OUT WORK"}, {"DACR", NULL, "DACR HiLo VREF"}, {"DACR", NULL, "DACR CLK"}, + {"DACR", NULL, "DACR WORK"}, - {"Left Headphone Mixer", "DAC Left Out Switch", "DACL"}, - {"Right Headphone Mixer", "DAC Right Out Switch", "DACR"}, - {"HP Left Out", NULL, "Left Headphone Mixer"}, - {"HP Right Out", NULL, "Right Headphone Mixer"}, + {"DACL", NULL, "DAI-IN"}, + {"DACR", NULL, "DAI-IN"}, - {"HP Left Switch", "HP Left Out Switch", "HP Left Out"}, - {"HP Right Switch", "HP Right Out Switch", "HP Right Out"}, - - {"HPL", NULL, "HP Left Switch"}, - {"HPR", NULL, "HP Right Switch"}, + {"HPL", NULL, "DACL"}, + {"HPR", NULL, "DACR"}, }; static int rk3036_codec_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) @@ -297,6 +277,20 @@ static int rk3036_codec_dai_hw_params(struct snd_pcm_substream *substream, return 0; } +static int rk3308_mute_stream(struct snd_soc_dai *dai, int mute, int stream) +{ + struct snd_soc_component *component = dai->component; + struct rk3036_codec_priv *priv = snd_soc_component_get_drvdata(component); + + if (stream == SNDRV_PCM_STREAM_CAPTURE) + return 0; + + if (priv->pa_ctl) + gpiod_direction_output(priv->pa_ctl, !mute); + + return 0; +} + #define RK3036_CODEC_RATES (SNDRV_PCM_RATE_8000 | \ SNDRV_PCM_RATE_16000 | \ SNDRV_PCM_RATE_32000 | \ @@ -312,6 +306,7 @@ static int rk3036_codec_dai_hw_params(struct snd_pcm_substream *substream, static const struct snd_soc_dai_ops rk3036_codec_dai_ops = { .set_fmt = rk3036_codec_dai_set_fmt, .hw_params = rk3036_codec_dai_hw_params, + .mute_stream = rk3308_mute_stream, }; static struct snd_soc_dai_driver rk3036_codec_dai_driver[] = { @@ -352,17 +347,18 @@ static int rk3036_codec_set_bias_level(struct snd_soc_component *component, enum snd_soc_bias_level level) { switch (level) { - case SND_SOC_BIAS_STANDBY: - /* set a big current for capacitor charging. */ - snd_soc_component_write(component, INNO_R10, INNO_R10_MAX_CUR); - /* start precharge */ + case SND_SOC_BIAS_PREPARE: + /* start precharge and waiting finish. */ snd_soc_component_write(component, INNO_R06, INNO_R06_DAC_PRECHARGE); + msleep(20); break; - case SND_SOC_BIAS_OFF: - /* set a big current for capacitor discharging. */ - snd_soc_component_write(component, INNO_R10, INNO_R10_MAX_CUR); + case SND_SOC_BIAS_STANDBY: + if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_OFF) { + /* set a big current for capacitor charging. */ + snd_soc_component_write(component, INNO_R10, INNO_R10_MAX_CUR); + } /* start discharge. */ snd_soc_component_write(component, INNO_R06, INNO_R06_DAC_DISCHARGE); @@ -434,6 +430,15 @@ static int rk3036_codec_platform_probe(struct platform_device *pdev) return ret; } + priv->pa_ctl = devm_gpiod_get_optional(&pdev->dev, "pa-ctl", + GPIOD_OUT_LOW); + if (!priv->pa_ctl) { + dev_info(&pdev->dev, "Don't need pa-ctl gpio\n"); + } else if (IS_ERR(priv->pa_ctl)) { + dev_err(&pdev->dev, "Unable to claim gpio pa-ctl\n"); + return PTR_ERR(priv->pa_ctl); + } + priv->pclk = devm_clk_get(&pdev->dev, "acodec_pclk"); if (IS_ERR(priv->pclk)) return PTR_ERR(priv->pclk); From 27e305c61103ce1579be5ca5bf989ad834087674 Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Wed, 15 Mar 2023 23:04:30 +0800 Subject: [PATCH 14/79] ARM: dts: rockchip: rk3036-evb1: fix the 'pa-ctl-gpios' name Signed-off-by: Xing Zheng Change-Id: I01e667086ac41d055ff1c2967f32d871f8e08cea --- arch/arm/boot/dts/rk3036-evb1-ddr3-v10.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3036-evb1-ddr3-v10.dts b/arch/arm/boot/dts/rk3036-evb1-ddr3-v10.dts index 756f94a12b38..85a1c67e79fc 100644 --- a/arch/arm/boot/dts/rk3036-evb1-ddr3-v10.dts +++ b/arch/arm/boot/dts/rk3036-evb1-ddr3-v10.dts @@ -121,7 +121,7 @@ }; &acodec { - spk_ctl_io = <&gpio2 RK_PD6 GPIO_ACTIVE_HIGH>; + pa-ctl-gpios = <&gpio2 RK_PD6 GPIO_ACTIVE_HIGH>; #sound-dai-cells = <0>; status = "okay"; }; From 1765ef7906a19eb403adf58f77dbec3217aca686 Mon Sep 17 00:00:00 2001 From: Yandong Lin Date: Wed, 8 Feb 2023 17:27:37 +0800 Subject: [PATCH 15/79] video: rockchip: mpp: fix get hw time error issue Use aclk to calculate hw time. Signed-off-by: Yandong Lin Change-Id: Ic2d218531ae03583e51f6b8016832ca9d2ff3c2d --- drivers/video/rockchip/mpp/mpp_rkvdec2.c | 3 ++- drivers/video/rockchip/mpp/mpp_rkvdec2.h | 2 ++ drivers/video/rockchip/mpp/mpp_rkvdec2_link.c | 12 ++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/video/rockchip/mpp/mpp_rkvdec2.c b/drivers/video/rockchip/mpp/mpp_rkvdec2.c index d30f157a23ef..780e2a364030 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvdec2.c +++ b/drivers/video/rockchip/mpp/mpp_rkvdec2.c @@ -422,7 +422,7 @@ static int rkvdec2_isr(struct mpp_dev *mpp) return IRQ_HANDLED; } mpp_task->hw_cycles = mpp_read(mpp, RKVDEC_PERF_WORKING_CNT); - mpp_time_diff_with_hw_time(mpp_task, dec->core_clk_info.real_rate_hz); + mpp_time_diff_with_hw_time(mpp_task, dec->cycle_clk->real_rate_hz); mpp->cur_task = NULL; task = to_rkvdec2_task(mpp_task); task->irq_status = mpp->irq_status; @@ -1005,6 +1005,7 @@ static int rkvdec2_init(struct mpp_dev *mpp) mpp_set_clk_info_rate_hz(&dec->cabac_clk_info, CLK_MODE_DEFAULT, 200 * MHZ); mpp_set_clk_info_rate_hz(&dec->hevc_cabac_clk_info, CLK_MODE_DEFAULT, 300 * MHZ); + dec->cycle_clk = &dec->aclk_info; /* Get normal max workload from dtsi */ of_property_read_u32(mpp->dev->of_node, "rockchip,default-max-load", &dec->default_max_load); diff --git a/drivers/video/rockchip/mpp/mpp_rkvdec2.h b/drivers/video/rockchip/mpp/mpp_rkvdec2.h index 58eb8f7869e9..be89535e6217 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvdec2.h +++ b/drivers/video/rockchip/mpp/mpp_rkvdec2.h @@ -178,6 +178,8 @@ struct rkvdec2_dev { struct mpp_clk_info core_clk_info; struct mpp_clk_info cabac_clk_info; struct mpp_clk_info hevc_cabac_clk_info; + struct mpp_clk_info *cycle_clk; + u32 default_max_load; #ifdef CONFIG_ROCKCHIP_MPP_PROC_FS struct proc_dir_entry *procfs; diff --git a/drivers/video/rockchip/mpp/mpp_rkvdec2_link.c b/drivers/video/rockchip/mpp/mpp_rkvdec2_link.c index 58bba121bdc7..41a4d9089aeb 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvdec2_link.c +++ b/drivers/video/rockchip/mpp/mpp_rkvdec2_link.c @@ -190,6 +190,7 @@ struct rkvdec_link_info rkvdec_link_vdpu382_hw_info = { }, .tb_reg_int = 180, .hack_setup = 0, + .tb_reg_cycle = 197, .reg_status = { .dec_num_mask = 0x000fffff, .err_flag_base = 0x024, @@ -674,7 +675,7 @@ static int rkvdec_link_isr_recv_task(struct mpp_dev *mpp, regs = table_base + idx * link_dec->link_reg_count; irq_status = regs[info->tb_reg_int]; mpp_task->hw_cycles = regs[info->tb_reg_cycle]; - mpp_time_diff_with_hw_time(mpp_task, dec->core_clk_info.real_rate_hz); + mpp_time_diff_with_hw_time(mpp_task, dec->cycle_clk->real_rate_hz); mpp_dbg_link_flow("slot %d rd task %d\n", idx, mpp_task->task_id); @@ -1684,12 +1685,19 @@ static int rkvdec2_ccu_power_on(struct mpp_taskqueue *queue, mpp_clk_safe_enable(ccu->aclk_info.clk); /* core pd and clk on */ for (i = 0; i < queue->core_count; i++) { + struct rkvdec2_dev *dec; + mpp = queue->cores[i]; + dec = to_rkvdec2_dev(mpp); pm_runtime_get_sync(mpp->dev); pm_stay_awake(mpp->dev); if (mpp->hw_ops->clk_on) mpp->hw_ops->clk_on(mpp); + mpp_clk_set_rate(&dec->aclk_info, CLK_MODE_NORMAL); + mpp_clk_set_rate(&dec->cabac_clk_info, CLK_MODE_NORMAL); + mpp_clk_set_rate(&dec->hevc_cabac_clk_info, CLK_MODE_NORMAL); + mpp_devfreq_set_core_rate(mpp, CLK_MODE_NORMAL); mpp_iommu_dev_activate(mpp->iommu_info, mpp); } mpp_debug(DEBUG_CCU, "power on\n"); @@ -1760,7 +1768,7 @@ static int rkvdec2_soft_ccu_dequeue(struct mpp_taskqueue *queue) set_bit(TASK_STATE_HANDLE, &mpp_task->state); cancel_delayed_work(&mpp_task->timeout_work); mpp_task->hw_cycles = mpp_read(mpp, RKVDEC_PERF_WORKING_CNT); - mpp_time_diff_with_hw_time(mpp_task, dec->core_clk_info.real_rate_hz); + mpp_time_diff_with_hw_time(mpp_task, dec->cycle_clk->real_rate_hz); task->irq_status = irq_status; mpp_debug(DEBUG_IRQ_CHECK, "irq_status=%08x, timeout=%u, abort=%u\n", irq_status, timeout_flag, abort_flag); From 7e17d0539ae0e1202889f1b5e7910182c2184784 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Wed, 4 Jan 2023 17:23:36 +0800 Subject: [PATCH 16/79] arm64: dts: rockchip: rk3528-evb: Enable HDMI Jack This patch enable HDMI event notify. Change-Id: Id2e585780e4c372b8de317bd499b0ab2b4788a1d Signed-off-by: Sugar Zhang --- arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi b/arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi index c003776f46fb..55d88dc6fc27 100644 --- a/arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi @@ -69,6 +69,7 @@ rockchip,card-name = "rockchip,hdmi"; rockchip,cpu = <&sai3>; rockchip,codec = <&hdmi>; + rockchip,jack-det; }; pdmics: dummy-codec { From c9e65e311aaf5bc7b10b5ff21e3b04b27c8b080f Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Thu, 16 Mar 2023 14:47:59 +0800 Subject: [PATCH 17/79] arm64: configs: rockchip_linux_defconfig: enable tve Signed-off-by: Damon Ding Change-Id: I024f9645385398377df526b9ea6af64280f36eac --- arch/arm64/configs/rockchip_linux_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/rockchip_linux_defconfig b/arch/arm64/configs/rockchip_linux_defconfig index 81a87c158bfb..37e69e7e8ae4 100644 --- a/arch/arm64/configs/rockchip_linux_defconfig +++ b/arch/arm64/configs/rockchip_linux_defconfig @@ -331,6 +331,7 @@ CONFIG_DRM_LOAD_EDID_FIRMWARE=y CONFIG_DRM_ROCKCHIP=y CONFIG_ROCKCHIP_ANALOGIX_DP=y CONFIG_ROCKCHIP_CDN_DP=y +CONFIG_ROCKCHIP_DRM_TVE=y CONFIG_ROCKCHIP_DW_HDMI=y CONFIG_ROCKCHIP_DW_MIPI_DSI=y CONFIG_ROCKCHIP_DW_DP=y From 56534c20ec1e3aaf9d8bca1a06e5bb173b12a991 Mon Sep 17 00:00:00 2001 From: Cai YiWei Date: Thu, 16 Mar 2023 16:12:32 +0800 Subject: [PATCH 18/79] arm64: dts: rockchip: rk3566 and rk3568 update iq feature Change-Id: I157391733ccd1bdea84ad3e1b270f925b61bb63a Signed-off-by: Cai YiWei --- arch/arm64/boot/dts/rockchip/rk3566.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3568.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566.dtsi b/arch/arm64/boot/dts/rockchip/rk3566.dtsi index 60c299ee7bf3..066f13843d11 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566.dtsi @@ -38,7 +38,7 @@ }; &rkisp { - rockchip,iq-feature = /bits/ 64 <0x3FBF7FE67FF>; + rockchip,iq-feature = /bits/ 64 <0x1BFBF7FE67FF>; }; &usbdrd_dwc3 { diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index 2f9174d1fad9..3e48ce788e8e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -1736,7 +1736,7 @@ rockchip,grf = <&grf>; power-domains = <&power RK3568_PD_VI>; iommus = <&rkisp_mmu>; - rockchip,iq-feature = /bits/ 64 <0x3FBFFFE67FF>; + rockchip,iq-feature = /bits/ 64 <0x1BFBFFFE67FF>; status = "disabled"; }; From c8eb54c56b24f93acbb43a7db521c0a2c754c3b2 Mon Sep 17 00:00:00 2001 From: Cai YiWei Date: Thu, 16 Mar 2023 11:27:33 +0800 Subject: [PATCH 19/79] media: rockchip: isp: fix 3a wr with two readback case Change-Id: Ib6e55e08dc9634706b2114c6f4d64be15a97ff52 Signed-off-by: Cai YiWei --- drivers/media/platform/rockchip/isp/rkisp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/rockchip/isp/rkisp.c b/drivers/media/platform/rockchip/isp/rkisp.c index 7d6a02ed6e16..0039b204070c 100644 --- a/drivers/media/platform/rockchip/isp/rkisp.c +++ b/drivers/media/platform/rockchip/isp/rkisp.c @@ -558,8 +558,9 @@ static void rkisp_multi_overflow_hdl(struct rkisp_device *dev, bool on) rkisp_update_regs(dev, ISP3X_BAY3D_CTRL, ISP3X_BAY3D_CTRL); } } else { - /* disabled bay3d and mi */ - writel(0, hw->base_addr + ISP3X_MI_WR_CTRL); + /* disabled bay3d and mi. rv1106 sdmmc workaround, 3a_wr no close */ + writel(CIF_MI_CTRL_INIT_OFFSET_EN | CIF_MI_CTRL_INIT_BASE_EN, + hw->base_addr + ISP3X_MI_WR_CTRL); if (dev->isp_ver == ISP_V21) { writel(0, hw->base_addr + ISP21_BAY3D_CTRL); } else if (dev->isp_ver == ISP_V30) { From d0edc7b3e70a4e1a2e76734bb79dbbf4fc204e0a Mon Sep 17 00:00:00 2001 From: Cai YiWei Date: Mon, 6 Mar 2023 15:45:37 +0800 Subject: [PATCH 20/79] media: rockchip: isp: thunder boot with multi sensor Change-Id: I20efdaf70a24e9b892b40bed6420b2988b8125b4 Signed-off-by: Cai YiWei --- drivers/media/platform/rockchip/isp/dev.c | 8 +- drivers/media/platform/rockchip/isp/dmarx.c | 3 + .../media/platform/rockchip/isp/isp_stats.c | 3 + drivers/media/platform/rockchip/isp/rkisp.c | 149 +++++++++--------- include/uapi/linux/rkisp2-config.h | 3 +- include/uapi/linux/rkisp32-config.h | 2 +- 6 files changed, 83 insertions(+), 85 deletions(-) diff --git a/drivers/media/platform/rockchip/isp/dev.c b/drivers/media/platform/rockchip/isp/dev.c index a6abe6fc722e..9aecc186faaa 100644 --- a/drivers/media/platform/rockchip/isp/dev.c +++ b/drivers/media/platform/rockchip/isp/dev.c @@ -861,11 +861,9 @@ static int rkisp_plat_probe(struct platform_device *pdev) strscpy(isp_dev->media_dev.driver_name, isp_dev->name, sizeof(isp_dev->media_dev.driver_name)); - if (isp_dev->hw_dev->is_thunderboot) { - ret = rkisp_get_reserved_mem(isp_dev); - if (ret) - return ret; - } + ret = rkisp_get_reserved_mem(isp_dev); + if (ret) + return ret; mutex_init(&isp_dev->apilock); mutex_init(&isp_dev->iqlock); diff --git a/drivers/media/platform/rockchip/isp/dmarx.c b/drivers/media/platform/rockchip/isp/dmarx.c index e190fef6386a..b7af0af8dc3f 100644 --- a/drivers/media/platform/rockchip/isp/dmarx.c +++ b/drivers/media/platform/rockchip/isp/dmarx.c @@ -471,6 +471,9 @@ static int dmarx_frame_end(struct rkisp_stream *stream) true, dev->hw_dev->is_unite); rkisp_unite_clear_bits(dev, CIF_ISP_IMSC, CIF_ISP_FRAME_IN, true, dev->hw_dev->is_unite); + dev_info(dev->dev, + "switch online seq:%d mode:0x%x\n", + rx_buf->sequence, dev->rd_mode); } rx_buf->runtime_us = dev->isp_sdev.dbg.interval / 1000; v4l2_subdev_call(sd, video, s_rx_buffer, rx_buf, NULL); diff --git a/drivers/media/platform/rockchip/isp/isp_stats.c b/drivers/media/platform/rockchip/isp/isp_stats.c index 025ff95843d5..7d21e2cb4c01 100644 --- a/drivers/media/platform/rockchip/isp/isp_stats.c +++ b/drivers/media/platform/rockchip/isp/isp_stats.c @@ -158,6 +158,9 @@ static void rkisp_stats_vb2_buf_queue(struct vb2_buffer *vb) struct rkisp32_isp_stat_buffer *buf = stats_dev->stats_buf[0].vaddr; if (buf && !buf->frame_id && buf->meas_type && stats_buf->vaddr[0]) { + dev_info(stats_dev->dev->dev, + "tb stat seq:%d meas_type:0x%x\n", + buf->frame_id, buf->meas_type); memcpy(stats_buf->vaddr[0], buf, sizeof(struct rkisp32_isp_stat_buffer)); buf->meas_type = 0; vb2_set_plane_payload(vb, 0, sizeof(struct rkisp32_isp_stat_buffer)); diff --git a/drivers/media/platform/rockchip/isp/rkisp.c b/drivers/media/platform/rockchip/isp/rkisp.c index 0039b204070c..44173416fa60 100644 --- a/drivers/media/platform/rockchip/isp/rkisp.c +++ b/drivers/media/platform/rockchip/isp/rkisp.c @@ -722,7 +722,7 @@ void rkisp_trigger_read_back(struct rkisp_device *dev, u8 dma2frm, u32 mode, boo params_vdev->rdbk_times = dma2frm + 1; run_next: - if (hw->is_multi_overflow) { + if (hw->is_multi_overflow && !dev->is_first_double) { stats_vdev->rdbk_drop = false; if (dev->sw_rd_cnt) { rkisp_multi_overflow_hdl(dev, false); @@ -908,6 +908,7 @@ static void rkisp_rdbk_trigger_handle(struct rkisp_device *dev, u32 cmd) if (isp->is_pre_on && t.frame_id == 0) { isp->is_first_double = true; isp->skip_frame = 1; + isp->sw_rd_cnt = 0; rkisp_fast_switch_rx_buf(isp, false); } } @@ -969,7 +970,8 @@ void rkisp_check_idle(struct rkisp_device *dev, u32 irq) if (dev->hw_dev->is_multi_overflow && dev->sw_rd_cnt && - irq & ISP_FRAME_END) + irq & ISP_FRAME_END && + !dev->is_first_double) goto end; dev->irq_ends |= (irq & dev->irq_ends_mask); @@ -2961,10 +2963,9 @@ static int rkisp_rx_qbuf(struct rkisp_device *dev, } v4l2_dbg(2, rkisp_debug, &dev->v4l2_dev, - "%s rd_mode:%d dma:0x%x vaddr:%p", - __func__, dev->rd_mode, - pool->buf.buff_addr[RKISP_PLANE_Y], - pool->buf.vaddr[RKISP_PLANE_Y]); + "%s rd_mode:%d seq:%d dma:0x%x\n", + __func__, dev->rd_mode, dbufs->sequence, + pool->buf.buff_addr[RKISP_PLANE_Y]); if (!IS_HDR_RDBK(dev->rd_mode)) { rkisp_rx_qbuf_online(stream, pool); @@ -3768,94 +3769,86 @@ void rkisp_unregister_isp_subdev(struct rkisp_device *isp_dev) #ifdef CONFIG_VIDEO_ROCKCHIP_THUNDER_BOOT_ISP void rkisp_chk_tb_over(struct rkisp_device *isp_dev) { + struct rkisp_hw_dev *hw = isp_dev->hw_dev; struct rkisp_thunderboot_resmem_head *head; enum rkisp_tb_state tb_state; void *resmem_va; - if (!isp_dev->hw_dev->is_thunderboot) + if (!isp_dev->is_thunderboot) return; - if (!isp_dev->is_thunderboot) { - v4l2_info(&isp_dev->v4l2_dev, - "no reserved memory for thunderboot\n"); - if (isp_dev->hw_dev->is_thunderboot) { - rkisp_tb_set_state(RKISP_TB_NG); - rkisp_tb_unprotect_clk(); - rkisp_register_irq(isp_dev->hw_dev); - isp_dev->hw_dev->is_thunderboot = false; - } - return; - } - resmem_va = phys_to_virt(isp_dev->resmem_pa); head = (struct rkisp_thunderboot_resmem_head *)resmem_va; - if (isp_dev->is_thunderboot) { - dma_sync_single_for_cpu(isp_dev->dev, isp_dev->resmem_addr, - sizeof(struct rkisp_thunderboot_resmem_head), - DMA_FROM_DEVICE); - if (head->enable && !head->complete) { - /* notify rtt to stop */ - head->enable = 0; - dma_sync_single_for_device(isp_dev->dev, isp_dev->resmem_addr, - sizeof(struct rkisp_thunderboot_resmem_head), - DMA_TO_DEVICE); - } - shm_head_poll_timeout(isp_dev, !!head->complete, 5000, 400 * USEC_PER_MSEC); - if (head->complete != RKISP_TB_OK) { - v4l2_err(&isp_dev->v4l2_dev, "wait thunderboot over timeout\n"); - } else { - struct rkisp_isp_params_vdev *params_vdev = &isp_dev->params_vdev; - void *param = NULL; - u32 size = 0; + dma_sync_single_for_cpu(isp_dev->dev, isp_dev->resmem_addr, + sizeof(struct rkisp_thunderboot_resmem_head), + DMA_FROM_DEVICE); - switch (isp_dev->hw_dev->isp_ver) { - case ISP_V32: - size = sizeof(struct rkisp32_thunderboot_resmem_head); - break; - default: - break; - } - if (size && size < isp_dev->resmem_size) { - dma_sync_single_for_cpu(isp_dev->dev, isp_dev->resmem_addr, - size, DMA_FROM_DEVICE); - params_vdev->is_first_cfg = true; - if (isp_dev->hw_dev->isp_ver == ISP_V32) { - struct rkisp32_thunderboot_resmem_head *tmp = resmem_va; - - param = &tmp->cfg; - } - if (param) - params_vdev->ops->save_first_param(params_vdev, param); - } else if (size > isp_dev->resmem_size) { - v4l2_err(&isp_dev->v4l2_dev, - "resmem size:%zu no enough for head:%d\n", - isp_dev->resmem_size, size); - head->complete = RKISP_TB_NG; + shm_head_poll_timeout(isp_dev, !!head->complete, 5000, 200 * USEC_PER_MSEC); + if (head->complete != RKISP_TB_OK) { + v4l2_err(&isp_dev->v4l2_dev, "wait thunderboot over timeout\n"); + } else { + struct rkisp_isp_params_vdev *params_vdev = &isp_dev->params_vdev; + void *param = NULL; + u32 size = 0, offset = 0; + + switch (isp_dev->isp_ver) { + case ISP_V32: + size = sizeof(struct rkisp32_thunderboot_resmem_head); + offset = size * isp_dev->dev_id; + break; + default: + break; + } + + if (size && size < isp_dev->resmem_size) { + dma_sync_single_for_cpu(isp_dev->dev, isp_dev->resmem_addr + offset, + size, DMA_FROM_DEVICE); + params_vdev->is_first_cfg = true; + if (isp_dev->isp_ver == ISP_V32) { + struct rkisp32_thunderboot_resmem_head *tmp = resmem_va + offset; + + param = &tmp->cfg; + head = &tmp->head; + v4l2_info(&isp_dev->v4l2_dev, + "tb param module en:0x%llx upd:0x%llx cfg upd:0x%llx\n", + tmp->cfg.module_en_update, + tmp->cfg.module_ens, + tmp->cfg.module_cfg_update); } + if (param) + params_vdev->ops->save_first_param(params_vdev, param); + } else if (size > isp_dev->resmem_size) { + v4l2_err(&isp_dev->v4l2_dev, + "resmem size:%zu no enough for head:%d\n", + isp_dev->resmem_size, size); + head->complete = RKISP_TB_NG; } - memcpy(&isp_dev->tb_head, head, sizeof(*head)); - v4l2_info(&isp_dev->v4l2_dev, - "thunderboot info: %d, %d, %d, %d, %d, %d %d\n", - head->enable, - head->complete, - head->frm_total, - head->hdr_mode, - head->width, - head->height, - head->bus_fmt); + } + memcpy(&isp_dev->tb_head, head, sizeof(*head)); + v4l2_info(&isp_dev->v4l2_dev, + "thunderboot info: %d, %d, %d, %d, %d, %d | %d %d\n", + head->enable, + head->complete, + head->frm_total, + head->hdr_mode, + head->width, + head->height, + head->camera_num, + head->camera_index); - tb_state = RKISP_TB_OK; - if (head->complete != RKISP_TB_OK) { - head->frm_total = 0; - tb_state = RKISP_TB_NG; - } + tb_state = RKISP_TB_OK; + if (head->complete != RKISP_TB_OK) { + head->frm_total = 0; + tb_state = RKISP_TB_NG; + } + if (hw->is_thunderboot) { + rkisp_register_irq(hw); rkisp_tb_set_state(tb_state); rkisp_tb_unprotect_clk(); - rkisp_register_irq(isp_dev->hw_dev); - isp_dev->hw_dev->is_thunderboot = false; - isp_dev->is_thunderboot = false; + hw->is_thunderboot = false; } + isp_dev->is_thunderboot = false; } #endif diff --git a/include/uapi/linux/rkisp2-config.h b/include/uapi/linux/rkisp2-config.h index cb0078ccccf1..efdcd2f4c326 100644 --- a/include/uapi/linux/rkisp2-config.h +++ b/include/uapi/linux/rkisp2-config.h @@ -1977,7 +1977,8 @@ struct rkisp_thunderboot_resmem_head { u16 hdr_mode; u16 width; u16 height; - u32 bus_fmt; + u16 camera_num; + u16 camera_index; u32 exp_time[3]; u32 exp_gain[3]; diff --git a/include/uapi/linux/rkisp32-config.h b/include/uapi/linux/rkisp32-config.h index a49f481579a0..b8d1cd6c73c9 100644 --- a/include/uapi/linux/rkisp32-config.h +++ b/include/uapi/linux/rkisp32-config.h @@ -1467,7 +1467,7 @@ struct rkisp32_isp_stat_buffer { struct rkisp32_thunderboot_resmem_head { struct rkisp_thunderboot_resmem_head head; struct isp32_isp_params_cfg cfg; -}; +} __attribute__ ((packed)); /****************isp32 lite********************/ From faa709dab9f5f0ddec53ba56384669c6942be252 Mon Sep 17 00:00:00 2001 From: Herman Chen Date: Thu, 21 Apr 2022 18:09:12 +0800 Subject: [PATCH 21/79] video: rockchip: mpp: Add tlb flush Signed-off-by: Herman Chen Change-Id: I4592ae29f05a1db342d2d34682878f4403f02312 --- drivers/video/rockchip/mpp/mpp_iep2.c | 3 +++ drivers/video/rockchip/mpp/mpp_rkvenc.c | 4 ++++ drivers/video/rockchip/mpp/mpp_vdpp.c | 3 +++ drivers/video/rockchip/mpp/mpp_vdpu1.c | 4 ++++ drivers/video/rockchip/mpp/mpp_vdpu2.c | 4 ++++ drivers/video/rockchip/mpp/mpp_vepu1.c | 4 ++++ drivers/video/rockchip/mpp/mpp_vepu2.c | 4 ++++ 7 files changed, 26 insertions(+) diff --git a/drivers/video/rockchip/mpp/mpp_iep2.c b/drivers/video/rockchip/mpp/mpp_iep2.c index fed2fc0effa8..d00516b2bca3 100644 --- a/drivers/video/rockchip/mpp/mpp_iep2.c +++ b/drivers/video/rockchip/mpp/mpp_iep2.c @@ -606,6 +606,9 @@ static int iep2_run(struct mpp_dev *mpp, | IEP2_REG_BUS_ERROR_EN | IEP2_REG_TIMEOUT_EN); + /* flush tlb before starting hardware */ + mpp_iommu_flush_tlb(mpp->iommu_info); + mpp_task_run_begin(mpp_task, timing_en, MPP_WORK_TIMEOUT_DELAY); /* Last, flush the registers */ diff --git a/drivers/video/rockchip/mpp/mpp_rkvenc.c b/drivers/video/rockchip/mpp/mpp_rkvenc.c index e6ab2c639df4..e6f7784b7284 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvenc.c +++ b/drivers/video/rockchip/mpp/mpp_rkvenc.c @@ -468,6 +468,10 @@ static int rkvenc_run(struct mpp_dev *mpp, rkvenc_write_req_backward(mpp, task->reg, s, e, reg_en); } } + + /* flush tlb before starting hardware */ + mpp_iommu_flush_tlb(mpp->iommu_info); + /* init current task */ mpp->cur_task = mpp_task; diff --git a/drivers/video/rockchip/mpp/mpp_vdpp.c b/drivers/video/rockchip/mpp/mpp_vdpp.c index a89e5f1c9515..c0dc382a1872 100644 --- a/drivers/video/rockchip/mpp/mpp_vdpp.c +++ b/drivers/video/rockchip/mpp/mpp_vdpp.c @@ -335,6 +335,9 @@ static int vdpp_run(struct mpp_dev *mpp, } } + /* flush tlb before starting hardware */ + mpp_iommu_flush_tlb(mpp->iommu_info); + /* init current task */ mpp->cur_task = mpp_task; diff --git a/drivers/video/rockchip/mpp/mpp_vdpu1.c b/drivers/video/rockchip/mpp/mpp_vdpu1.c index 69039e79e6a5..a4d82ac1b990 100644 --- a/drivers/video/rockchip/mpp/mpp_vdpu1.c +++ b/drivers/video/rockchip/mpp/mpp_vdpu1.c @@ -409,6 +409,10 @@ static int vdpu_run(struct mpp_dev *mpp, mpp_write_req(mpp, task->reg, s, e, reg_en); } + + /* flush tlb before starting hardware */ + mpp_iommu_flush_tlb(mpp->iommu_info); + /* init current task */ mpp->cur_task = mpp_task; diff --git a/drivers/video/rockchip/mpp/mpp_vdpu2.c b/drivers/video/rockchip/mpp/mpp_vdpu2.c index 34191b510845..d5a6a3017208 100644 --- a/drivers/video/rockchip/mpp/mpp_vdpu2.c +++ b/drivers/video/rockchip/mpp/mpp_vdpu2.c @@ -361,6 +361,10 @@ static int vdpu_run(struct mpp_dev *mpp, mpp_write_req(mpp, task->reg, s, e, reg_en); } + + /* flush tlb before starting hardware */ + mpp_iommu_flush_tlb(mpp->iommu_info); + /* init current task */ mpp->cur_task = mpp_task; diff --git a/drivers/video/rockchip/mpp/mpp_vepu1.c b/drivers/video/rockchip/mpp/mpp_vepu1.c index 8c9782d78a1b..18f685a2e948 100644 --- a/drivers/video/rockchip/mpp/mpp_vepu1.c +++ b/drivers/video/rockchip/mpp/mpp_vepu1.c @@ -283,6 +283,10 @@ static int vepu_run(struct mpp_dev *mpp, mpp_write_req(mpp, task->reg, s, e, reg_en); } + + /* flush tlb before starting hardware */ + mpp_iommu_flush_tlb(mpp->iommu_info); + /* init current task */ mpp->cur_task = mpp_task; diff --git a/drivers/video/rockchip/mpp/mpp_vepu2.c b/drivers/video/rockchip/mpp/mpp_vepu2.c index acb1dc0dbfe3..1dbff8bb733b 100644 --- a/drivers/video/rockchip/mpp/mpp_vepu2.c +++ b/drivers/video/rockchip/mpp/mpp_vepu2.c @@ -383,6 +383,10 @@ static int vepu_run(struct mpp_dev *mpp, mpp_write_req(mpp, task->reg, s, e, reg_en); } + + /* flush tlb before starting hardware */ + mpp_iommu_flush_tlb(mpp->iommu_info); + /* init current task */ mpp->cur_task = mpp_task; From 70c13cc64ab527ad4360334f520699e0ce4ef0a3 Mon Sep 17 00:00:00 2001 From: Chandler Chen Date: Mon, 13 Mar 2023 18:35:34 +0800 Subject: [PATCH 22/79] arm64: dts: rockchip: rk3528: add shootdown-entire for vcodec mmu Signed-off-by: Chandler Chen Change-Id: I57cff6c255f69baec2437b44165ab3e71c89520b --- arch/arm64/boot/dts/rockchip/rk3528.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3528.dtsi b/arch/arm64/boot/dts/rockchip/rk3528.dtsi index be4de512d84d..a89f1f139df0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3528.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3528.dtsi @@ -1009,6 +1009,7 @@ clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>, <&cru CLK_HEVC_CA_RKVDEC>; clock-names = "aclk", "iface", "clk_hevc_cabac"; #iommu-cells = <0>; + rockchip,shootdown-entire; status = "disabled"; }; @@ -1043,6 +1044,7 @@ clocks = <&cru ACLK_RKVENC>, <&cru HCLK_RKVENC>; clock-names = "aclk", "iface"; #iommu-cells = <0>; + rockchip,shootdown-entire; status = "disabled"; }; @@ -1077,6 +1079,7 @@ clock-names = "aclk", "iface"; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; #iommu-cells = <0>; + rockchip,shootdown-entire; status = "disabled"; }; @@ -1216,6 +1219,7 @@ clocks = <&cru ACLK_VDPP>, <&cru HCLK_VDPP>; clock-names = "aclk", "iface"; #iommu-cells = <0>; + rockchip,shootdown-entire; status = "disabled"; }; @@ -1270,6 +1274,7 @@ clock-names = "aclk", "iface"; clocks = <&cru ACLK_JPEG_DECODER>, <&cru HCLK_JPEG_DECODER>; #iommu-cells = <0>; + rockchip,shootdown-entire; status = "disabled"; }; From 7a64b622460d1140daf4d29eedac2bb79dc8ff4c Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Thu, 16 Mar 2023 15:36:13 +0800 Subject: [PATCH 23/79] PCI: rockchip: dw: Support BAR4 for standard ep Change-Id: Ia6182f410681b76f2d7c8225d0d8467c5664452f Signed-off-by: Jon Lin --- .../pci/controller/dwc/pcie-dw-ep-rockchip.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c index b315bb97a70f..943b23d2c35b 100644 --- a/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c +++ b/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c @@ -482,23 +482,26 @@ static void rockchip_pcie_resize_bar(struct rockchip_pcie *rockchip) resbar_base = rockchip_pci_find_resbar_capability(rockchip); /* Resize BAR0 4M 32bits, BAR2 64M 64bits-pref */ - bar = 0; + bar = BAR_0; dw_pcie_writel_dbi(pci, resbar_base + 0x4 + bar * 0x8, 0xfffff0); dw_pcie_writel_dbi(pci, resbar_base + 0x8 + bar * 0x8, 0x2c0); - rockchip_pcie_ep_set_bar_flag(rockchip, BAR_0, PCI_BASE_ADDRESS_MEM_TYPE_32); + rockchip_pcie_ep_set_bar_flag(rockchip, bar, PCI_BASE_ADDRESS_MEM_TYPE_32); - bar = 2; + bar = BAR_2; dw_pcie_writel_dbi(pci, resbar_base + 0x4 + bar * 0x8, 0xfffff0); dw_pcie_writel_dbi(pci, resbar_base + 0x8 + bar * 0x8, 0x6c0); - rockchip_pcie_ep_set_bar_flag(rockchip, BAR_2, + rockchip_pcie_ep_set_bar_flag(rockchip, bar, PCI_BASE_ADDRESS_MEM_PREFETCH | PCI_BASE_ADDRESS_MEM_TYPE_64); + bar = BAR_4; + dw_pcie_writel_dbi(pci, resbar_base + 0x4 + bar * 0x8, 0xfffff0); + dw_pcie_writel_dbi(pci, resbar_base + 0x8 + bar * 0x8, 0xc0); + rockchip_pcie_ep_set_bar_flag(rockchip, bar, PCI_BASE_ADDRESS_MEM_TYPE_32); + /* Disable BAR1 BAR4 BAR5*/ - bar = 1; + bar = BAR_1; dw_pcie_writel_dbi(pci, PCIE_TYPE0_HDR_DBI2_OFFSET + 0x10 + bar * 4, 0); - bar = 4; - dw_pcie_writel_dbi(pci, PCIE_TYPE0_HDR_DBI2_OFFSET + 0x10 + bar * 4, 0); - bar = 5; + bar = BAR_5; dw_pcie_writel_dbi(pci, PCIE_TYPE0_HDR_DBI2_OFFSET + 0x10 + bar * 4, 0); } From 462aebc05c2a68ccd8c6308f622cd4c8a61c85a8 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Tue, 13 Dec 2022 10:36:41 +0800 Subject: [PATCH 24/79] dt-bindings: input: add rockchip ir key defines Change-Id: Ib2badc1396ec6b7e34b12ba77900d26a06a19694 Signed-off-by: Alex Wang --- include/dt-bindings/input/rk-ir.h | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 include/dt-bindings/input/rk-ir.h diff --git a/include/dt-bindings/input/rk-ir.h b/include/dt-bindings/input/rk-ir.h new file mode 100644 index 000000000000..139fe04c068e --- /dev/null +++ b/include/dt-bindings/input/rk-ir.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */ +/* + * Copyright (c) 2022 Rockchip Electronics Co., Ltd. + */ +#ifndef _DT_BINDINGS_RK_IR_H +#define _DT_BINDINGS_RK_IR_H + +/* STB KEYS */ +#define KEY_CHANNEL_UP 249 +#define KEY_CHANNEL_DN 250 +#define KEY_HOME_PAGE 251 +#define KEY_CH_CUT_BACK 252 +#define KEY_DIRECT_SEEDING 253 +#define KEY_REVIEW 254 +#define KEY_ON_DEMAND 255 +#define KEY_INFO1 256 +#define KEY_SOUND1 257 +#define KEY_X1 258 +#define KEY_X2 259 +#define KEY_LOCAL 260 +#define KEY_APPLICATION 261 +#define KEY_POS 262 +#define KEY_GO_BEGINNING 263 +#define KEY_INTERX 264 +#define KEY_FAVORITE 265 +#define KEY_CHANNEL_POS 266 +#define KEY_EVENT 267 +#define KEY_COMM 268 +#define KEY_LAUNCH 269 +#define KEY_TRACK 270 +#define KEY_PORTAL 271 + +#endif From f75576ce538c1bd56262cd586ac20cefca37f2b2 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Tue, 13 Dec 2022 10:56:50 +0800 Subject: [PATCH 25/79] arm64: dts: rockchip: add rk-stb-ir-keymap.dtsi Change-Id: Iaf102ad2af5e34c6f18df15c5c0fbe7101d6012a Signed-off-by: Alex Wang --- .../boot/dts/rockchip/rk-stb-ir-keymap.dtsi | 394 ++++++++++++++++++ 1 file changed, 394 insertions(+) create mode 100644 arch/arm64/boot/dts/rockchip/rk-stb-ir-keymap.dtsi diff --git a/arch/arm64/boot/dts/rockchip/rk-stb-ir-keymap.dtsi b/arch/arm64/boot/dts/rockchip/rk-stb-ir-keymap.dtsi new file mode 100644 index 000000000000..0c0d9230aa69 --- /dev/null +++ b/arch/arm64/boot/dts/rockchip/rk-stb-ir-keymap.dtsi @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2022 Rockchip Electronics Co., Ltd. + */ +#include + +&pwm3 { + ir_key1 { + rockchip,usercode = <0xff00>; + rockchip,key_table = + <0xf9 KEY_HOME>, + <0xbf KEY_BACK>, + <0xfb KEY_MENU>, + <0xaa KEY_REPLY>, + <0xb9 KEY_UP>, + <0xe9 KEY_DOWN>, + <0xb8 KEY_LEFT>, + <0xea KEY_RIGHT>, + <0xeb KEY_VOLUMEDOWN>, + <0xef KEY_VOLUMEUP>, + <0xf7 KEY_MUTE>, + <0xe7 KEY_POWER>, + <0xfc KEY_POWER>, + <0xa9 KEY_VOLUMEDOWN>, + <0xa8 KEY_PLAYPAUSE>, + <0xe0 KEY_VOLUMEDOWN>, + <0xa5 KEY_VOLUMEDOWN>, + <0xab 183>, + <0xb7 388>, + <0xe8 388>, + <0xf8 184>, + <0xaf 185>, + <0xed KEY_VOLUMEDOWN>, + <0xee 186>, + <0xb3 KEY_VOLUMEDOWN>, + <0xf1 KEY_VOLUMEDOWN>, + <0xf2 KEY_VOLUMEDOWN>, + <0xf3 KEY_SEARCH>, + <0xb4 KEY_VOLUMEDOWN>, + <0xa4 KEY_SETUP>, + <0xbe KEY_SEARCH>; + }; + + /*for IPTV ltjc*/ + ir_key2 { + rockchip,usercode = <0xc43b>; + rockchip,key_table = + <0x7e KEY_REPLY>, + <0x7f KEY_BACK>, + <0x7a KEY_UP>, + <0x78 KEY_DOWN>, + <0x7b KEY_LEFT>, + <0x79 KEY_RIGHT>, + <0x66 KEY_VOLUMEUP>, + <0x65 KEY_VOLUMEDOWN>, + <0x69 KEY_POWER>, + <0x64 KEY_MUTE>, + <0x76 KEY_1>, + <0x75 KEY_2>, + <0x74 KEY_3>, + <0x73 KEY_4>, + <0x72 KEY_5>, + <0x71 KEY_6>, + <0x70 KEY_7>, + <0x6f KEY_8>, + <0x6e KEY_9>, + <0x77 KEY_0>, + <0x7c KEY_PAGEDOWN>, + <0x7d KEY_PAGEUP>, + <0x6a KEY_SETUP>, + <0x68 KEY_CHANNEL_UP>, + <0x67 KEY_CHANNEL_DN>, + <0x39 KEY_PORTAL>, + <0x29 KEY_HOME_PAGE>, + <0x33 KEY_CH_CUT_BACK>, + <0x34 KEY_LOCAL>, + <0x2d KEY_REVIEW>, + <0x2c KEY_ON_DEMAND>, + <0x2b KEY_INFO1>, + <0x2e KEY_DIRECT_SEEDING>, + <0x2d KEY_REVIEW>, + <0x2c KEY_ON_DEMAND>, + <0x2b KEY_INFO1>, + <0x63 KEY_SOUND1>, + <0x6c KEY_X1>, + <0x6d KEY_X2>, + <0x62 KEY_PLAYPAUSE>, + <0x6b KEY_EQUAL>, + <0x61 KEY_FASTFORWARD>, + <0x60 KEY_REWIND>, + <0x3b KEY_STOP>, + <0x35 KEY_BLUE>, + <0x36 KEY_YELLOW>, + <0x37 KEY_GREEN>, + <0x38 KEY_RED>; + }; + + ir_key3 { + rockchip,usercode = <0x1dcc>; + rockchip,key_table = + <0xee KEY_REPLY>, + <0xf0 KEY_BACK>, + <0xf8 KEY_UP>, + <0xbb KEY_DOWN>, + <0xef KEY_LEFT>, + <0xed KEY_RIGHT>, + <0xfc KEY_HOME>, + <0xf1 KEY_VOLUMEUP>, + <0xfd KEY_VOLUMEDOWN>, + <0xb7 KEY_SEARCH>, + <0xff KEY_POWER>, + <0xf3 KEY_MUTE>, + <0xbf KEY_MENU>, + <0xf9 0x191>, + <0xf5 0x192>, + <0xb3 388>, + <0xbe KEY_1>, + <0xba KEY_2>, + <0xb2 KEY_3>, + <0xbd KEY_4>, + <0xf9 KEY_5>, + <0xb1 KEY_6>, + <0xfc KEY_7>, + <0xf8 KEY_8>, + <0xb0 KEY_9>, + <0xb6 KEY_0>, + <0xb5 KEY_BACKSPACE>; + }; + + /* for IPTV */ + ir_key4 { + rockchip,usercode = <0x4db2>; + rockchip,key_table = + <0x31 KEY_REPLY>, + <0x3a KEY_BACK>, + <0x35 KEY_UP>, + <0x2d KEY_DOWN>, + <0x66 KEY_LEFT>, + <0x3e KEY_RIGHT>, + <0x7f KEY_VOLUMEUP>, + <0xfe KEY_VOLUMEDOWN>, + <0x23 KEY_POWER>, + <0x63 KEY_MUTE>, + <0x6d KEY_1>, + <0x6c KEY_2>, + <0x33 KEY_3>, + <0x71 KEY_4>, + <0x70 KEY_5>, + <0x37 KEY_6>, + <0x75 KEY_7>, + <0x74 KEY_8>, + <0x3b KEY_9>, + <0x78 KEY_0>, + <0x73 KEY_PAGEDOWN>, + <0x22 KEY_PAGEUP>, + <0x72 KEY_SETUP>, + <0x7a KEY_CHANNEL_UP>, + <0x79 KEY_CHANNEL_DN>, + <0x77 KEY_HOME_PAGE>, + <0x29 KEY_CH_CUT_BACK>, + <0x32 KEY_DIRECT_SEEDING>, + <0x6e KEY_REVIEW>, + <0x7c KEY_ON_DEMAND>, + <0x3c KEY_INFO1>, + <0x67 KEY_SOUND1>, + <0x25 KEY_X1>, + <0x2f KEY_X2>, + <0x7d KEY_LOCAL>, + <0x6a KEY_PLAYPAUSE>, + <0x0b KEY_EQUAL>; + }; + + /* for CMCC */ + ir_key5 { + rockchip,usercode = <0x1608>; + rockchip,key_table = + <0x4c KEY_REPLY>, + <0x4d KEY_BACK>, + <0x4b KEY_UP>, + <0x4a KEY_DOWN>, + <0x49 KEY_LEFT>, + <0x48 KEY_RIGHT>, + <0x4e KEY_HOME>, + <0x0b KEY_VOLUMEUP>, + <0x0c KEY_VOLUMEDOWN>, + <0x23 KEY_POWER>, + <0x45 KEY_MUTE>, + <0x44 KEY_MENU>, + <0x78 KEY_1>, + <0x77 KEY_2>, + <0x76 KEY_3>, + <0x75 KEY_4>, + <0x74 KEY_5>, + <0x73 KEY_6>, + <0x72 KEY_7>, + <0x71 KEY_8>, + <0x70 KEY_9>, + <0x79 KEY_0>, + <0x43 KEY_EQUAL>, + <0x72 KEY_X1>, + <0x5f KEY_SETUP>, + <0x25 KEY_DIRECT_SEEDING>, + <0x24 KEY_REVIEW>, + <0x21 KEY_ON_DEMAND>, + <0x20 KEY_INFO1>; + }; + + /* rk new remote */ + ir_key6 { + rockchip,usercode = <0xfe01>; + rockchip,key_table = + <0xec KEY_REPLY>, + <0xe6 KEY_BACK>, + <0xe9 KEY_UP>, + <0xe5 KEY_DOWN>, + <0xae KEY_LEFT>, + <0xaf KEY_RIGHT>, + <0xee KEY_HOME>, + <0xe7 KEY_VOLUMEUP>, + <0xef KEY_VOLUMEDOWN>, + <0xbf KEY_POWER>, + <0xbe KEY_MUTE>, + <0xb3 KEY_MENU>, + <0xff 388>, + <0xb1 KEY_1>, + <0xf2 KEY_2>, + <0xf3 KEY_3>, + <0xb5 KEY_4>, + <0xf6 KEY_5>, + <0xf7 KEY_6>, + <0xb9 KEY_7>, + <0xfa KEY_8>, + <0xfb KEY_9>, + <0xfe KEY_0>, + <0xbd KEY_EQUAL>, + <0xbc KEY_SETUP>, + <0xf0 KEY_LOCAL>, + <0x0d KEY_DIRECT_SEEDING>, + <0x0c KEY_REVIEW>, + <0x0b KEY_ON_DEMAND>, + <0x0a KEY_INFO1>, + <0x0e KEY_CH_CUT_BACK>; + }; + + /* for IPTV gd */ + ir_key7 { + rockchip,usercode = <0x4cb3>; + rockchip,key_table = + <0x31 KEY_REPLY>, + <0x3a KEY_BACK>, + <0x35 KEY_UP>, + <0x2d KEY_DOWN>, + <0x66 KEY_LEFT>, + <0x3e KEY_RIGHT>, + <0x7f KEY_VOLUMEUP>, + <0x7e KEY_VOLUMEDOWN>, + <0x23 KEY_POWER>, + <0x63 KEY_MUTE>, + <0x6d KEY_1>, + <0x6c KEY_2>, + <0x33 KEY_3>, + <0x71 KEY_4>, + <0x70 KEY_5>, + <0x37 KEY_6>, + <0x75 KEY_7>, + <0x74 KEY_8>, + <0x3b KEY_9>, + <0x78 KEY_0>, + <0x73 KEY_PAGEDOWN>, + <0x22 KEY_PAGEUP>, + <0x72 KEY_SETUP>, + <0x7a KEY_CHANNEL_UP>, + <0x79 KEY_CHANNEL_DN>, + <0x77 KEY_HOME_PAGE>, + <0x29 KEY_CH_CUT_BACK>, + <0x32 KEY_DIRECT_SEEDING>, + <0x6e KEY_REVIEW>, + <0x7c KEY_ON_DEMAND>, + <0x3c KEY_INFO1>, + <0x67 KEY_SOUND1>, + <0x25 KEY_X1>, + <0x2f KEY_X2>, + <0x7d KEY_LOCAL>, + <0x6a KEY_PLAYPAUSE>, + <0x0b KEY_EQUAL>; + }; + + /* for CMCC */ + ir_key8 { + rockchip,usercode = <0xdd22>; + rockchip,key_table = + <0x31 KEY_REPLY>, + <0x6a KEY_BACK>, + <0x35 KEY_UP>, + <0x2d KEY_DOWN>, + <0x66 KEY_LEFT>, + <0x3e KEY_RIGHT>, + <0x7f KEY_VOLUMEUP>, + <0x7e KEY_VOLUMEDOWN>, + <0x23 KEY_POWER>, + <0x63 KEY_MUTE>, + <0x6d KEY_1>, + <0x6c KEY_2>, + <0x33 KEY_3>, + <0x71 KEY_4>, + <0x70 KEY_5>, + <0x37 KEY_6>, + <0x75 KEY_7>, + <0x74 KEY_8>, + <0x3b KEY_9>, + <0x78 KEY_0>, + <0x73 KEY_PAGEDOWN>, + <0x22 KEY_PAGEUP>, + <0x72 KEY_SETUP>, + <0x7a KEY_CHANNEL_UP>, + <0x79 KEY_CHANNEL_DN>, + <0x77 KEY_HOME_PAGE>, + <0x2f KEY_CH_CUT_BACK>, + <0x32 KEY_DIRECT_SEEDING>, + <0x6e KEY_REVIEW>, + <0x7c KEY_ON_DEMAND>, + <0x3c KEY_INFO1>, + <0x3a KEY_HELP>, + <0x67 KEY_SOUND1>, + <0x25 KEY_X2>, + <0x7d KEY_MENU>, + <0x3f KEY_EQUAL>, + <0x29 388>, + <0x26 KEY_PLAYPAUSE>, + <0x76 401>, + <0x7b 400>, + <0x69 66>; + }; + + /* for BJLT IPTV */ + ir_key9 { + rockchip,usercode = <0x3bc4>; + rockchip,key_table = + <0x81 KEY_REPLY>, + <0x80 KEY_BACK>, + <0x85 KEY_UP>, + <0x87 KEY_DOWN>, + <0x84 KEY_LEFT>, + <0x86 KEY_RIGHT>, + <0x99 KEY_VOLUMEUP>, + <0x9a KEY_VOLUMEDOWN>, + <0x96 KEY_POWER>, + <0x9b KEY_MUTE>, + <0x89 KEY_1>, + <0x8a KEY_2>, + <0x8b KEY_3>, + <0x8c KEY_4>, + <0x8d KEY_5>, + <0x8e KEY_6>, + <0x8f KEY_7>, + <0x90 KEY_8>, + <0x91 KEY_9>, + <0x88 KEY_0>, + <0x83 KEY_PAGEDOWN>, + <0x82 KEY_PAGEUP>, + <0x95 KEY_SETUP>, + <0x97 KEY_CHANNEL_UP>, + <0x98 KEY_CHANNEL_DN>, + <0xc6 KEY_LOCAL>, + <0xd6 KEY_HOME_PAGE>, + <0xd7 KEY_TRACK>, + <0xcc KEY_CH_CUT_BACK>, + <0xc3 KEY_INTERX>, + <0xd1 KEY_DIRECT_SEEDING>, + <0xd2 KEY_REVIEW>, + <0xd3 KEY_ON_DEMAND>, + <0xd4 KEY_INFO1>, + <0xc7 KEY_DIRECT_SEEDING>, + <0xc8 KEY_REVIEW>, + <0xc9 KEY_ON_DEMAND>, + <0xca KEY_INFO1>, + <0xcd KEY_FAVORITE>, + <0xce KEY_CHANNEL_POS>, + <0xcf KEY_HELP>, + <0xd0 KEY_EVENT>, + <0x9c KEY_SOUND1>, + <0x93 KEY_X1>, + <0x92 KEY_X2>, + <0xc0 KEY_END>, + <0xc1 KEY_GO_BEGINNING>, + <0x9d KEY_PLAYPAUSE>, + <0xc4 KEY_STOP>, + <0x94 KEY_EQUAL>, + <0x9e KEY_YELLOW>, + <0x9f KEY_BLUE>, + <0xcb KEY_APPLICATION>, + <0xc5 KEY_POS>; + }; +}; From 0bd00b2676d4e59e7bba7876215d5c834d98191a Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Tue, 13 Dec 2022 10:24:23 +0800 Subject: [PATCH 26/79] arm64: dts: rockchip: rk3528-demo: include rk-stb-ir-keymap Change-Id: Id88e0bd5b219b9e99636a01a17c25d18c0fb5886 Signed-off-by: Alex Wang --- arch/arm64/boot/dts/rockchip/rk3528-demo.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3528-demo.dtsi b/arch/arm64/boot/dts/rockchip/rk3528-demo.dtsi index 1362d726ea4d..6af7ab9d871d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3528-demo.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3528-demo.dtsi @@ -8,6 +8,7 @@ #include #include #include +#include "rk-stb-ir-keymap.dtsi" / { acodec_sound: acodec-sound { From e4423ba4ce7634e3714c2a5e67312d99a0dab2a6 Mon Sep 17 00:00:00 2001 From: Cai YiWei Date: Thu, 23 Feb 2023 11:54:06 +0800 Subject: [PATCH 27/79] media: rockchip: isp: support buf early done Change-Id: Icfa088b48105089c4a77aeeebd6b8358d1e90bef Signed-off-by: Cai YiWei --- drivers/media/platform/rockchip/isp/capture.c | 22 +++- drivers/media/platform/rockchip/isp/capture.h | 5 +- .../media/platform/rockchip/isp/capture_v1x.c | 10 +- .../media/platform/rockchip/isp/capture_v20.c | 20 ++-- .../media/platform/rockchip/isp/capture_v21.c | 16 +-- .../media/platform/rockchip/isp/capture_v30.c | 54 ++++++---- .../media/platform/rockchip/isp/capture_v32.c | 101 ++++++++++-------- drivers/media/platform/rockchip/isp/dev.h | 1 + .../platform/rockchip/isp/isp_params_v3x.c | 8 -- .../platform/rockchip/isp/isp_stats_v3x.c | 7 +- drivers/media/platform/rockchip/isp/rkisp.c | 45 +++++++- 11 files changed, 185 insertions(+), 104 deletions(-) diff --git a/drivers/media/platform/rockchip/isp/capture.c b/drivers/media/platform/rockchip/isp/capture.c index 115fb85eb5e6..cbb45aa8da42 100644 --- a/drivers/media/platform/rockchip/isp/capture.c +++ b/drivers/media/platform/rockchip/isp/capture.c @@ -166,7 +166,7 @@ int hdr_update_dmatx_buf(struct rkisp_device *dev) for (i = RKISP_STREAM_DMATX0; i <= RKISP_STREAM_DMATX2; i++) { dmatx = &dev->cap_dev.stream[i]; if (dmatx->ops && dmatx->ops->frame_end) - dmatx->ops->frame_end(dmatx); + dmatx->ops->frame_end(dmatx, FRAME_INIT); } if (dev->dmarx_dev.trigger) @@ -455,6 +455,26 @@ int rkisp_stream_frame_start(struct rkisp_device *dev, u32 isp_mis) return 0; } +void rkisp_stream_buf_done_early(struct rkisp_device *dev) +{ + struct rkisp_stream *stream; + int i; + + if (!dev->cap_dev.is_done_early) + return; + + for (i = 0; i < RKISP_MAX_STREAM; i++) { + if (i == RKISP_STREAM_VIR || i == RKISP_STREAM_LUMA || + i == RKISP_STREAM_DMATX0 || i == RKISP_STREAM_DMATX1 || + i == RKISP_STREAM_DMATX2 || i == RKISP_STREAM_DMATX3) + continue; + stream = &dev->cap_dev.stream[i]; + if (stream->streaming && !stream->stopping && + stream->ops && stream->ops->frame_end) + stream->ops->frame_end(stream, FRAME_WORK); + } +} + struct stream_config rkisp_mp_stream_config = { /* constraints */ .max_rsz_width = STREAM_MAX_MP_RSZ_OUTPUT_WIDTH, diff --git a/drivers/media/platform/rockchip/isp/capture.h b/drivers/media/platform/rockchip/isp/capture.h index 04e90d628311..b76c075d90fc 100644 --- a/drivers/media/platform/rockchip/isp/capture.h +++ b/drivers/media/platform/rockchip/isp/capture.h @@ -219,7 +219,7 @@ struct streams_ops { void (*set_data_path)(struct rkisp_stream *stream); bool (*is_stream_stopped)(struct rkisp_stream *stream); void (*update_mi)(struct rkisp_stream *stream); - int (*frame_end)(struct rkisp_stream *stream); + int (*frame_end)(struct rkisp_stream *stream, u32 state); int (*frame_start)(struct rkisp_stream *stream, u32 mis); int (*set_wrap)(struct rkisp_stream *stream, int line); }; @@ -280,7 +280,7 @@ struct rkisp_stream { bool is_pause; bool is_crop_upd; bool is_using_resmem; - bool is_tb_s_info; + bool frame_early; wait_queue_head_t done; unsigned int burst; atomic_t sequence; @@ -322,6 +322,7 @@ extern struct stream_config rkisp_mp_stream_config; extern struct stream_config rkisp_sp_stream_config; extern struct rockit_isp_ops rockit_isp_ops; +void rkisp_stream_buf_done_early(struct rkisp_device *dev); void rkisp_stream_buf_done(struct rkisp_stream *stream, struct rkisp_buffer *buf); void rkisp_unregister_stream_vdev(struct rkisp_stream *stream); diff --git a/drivers/media/platform/rockchip/isp/capture_v1x.c b/drivers/media/platform/rockchip/isp/capture_v1x.c index 3c1df5e5273d..a9026e2a2ee6 100644 --- a/drivers/media/platform/rockchip/isp/capture_v1x.c +++ b/drivers/media/platform/rockchip/isp/capture_v1x.c @@ -15,7 +15,7 @@ #define CIF_ISP_REQ_BUFS_MIN 0 -static int mi_frame_end(struct rkisp_stream *stream); +static int mi_frame_end(struct rkisp_stream *stream, u32 state); static void rkisp_buf_queue(struct vb2_buffer *vb); static int rkisp_create_dummy_buf(struct rkisp_stream *stream); @@ -517,7 +517,7 @@ static int mp_config_mi(struct rkisp_stream *stream) mp_mi_ctrl_autoupdate_en(base); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -594,7 +594,7 @@ static int sp_config_mi(struct rkisp_stream *stream) sp_mi_ctrl_autoupdate_en(base); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -708,7 +708,7 @@ static struct streams_ops rkisp_sp_streams_ops = { * is processing and we should set up buffer for next-next frame, * otherwise it will overflow. */ -static int mi_frame_end(struct rkisp_stream *stream) +static int mi_frame_end(struct rkisp_stream *stream, u32 state) { struct rkisp_device *dev = stream->ispdev; struct capture_fmt *isp_fmt = &stream->out_isp_fmt; @@ -1262,7 +1262,7 @@ void rkisp_mi_v1x_isr(u32 mis_val, struct rkisp_device *dev) wake_up(&stream->done); } } else { - mi_frame_end(stream); + mi_frame_end(stream, FRAME_IRQ); } } } diff --git a/drivers/media/platform/rockchip/isp/capture_v20.c b/drivers/media/platform/rockchip/isp/capture_v20.c index ec766d08d807..209298197978 100644 --- a/drivers/media/platform/rockchip/isp/capture_v20.c +++ b/drivers/media/platform/rockchip/isp/capture_v20.c @@ -15,7 +15,7 @@ #define CIF_ISP_REQ_BUFS_MIN 0 -static int mi_frame_end(struct rkisp_stream *stream); +static int mi_frame_end(struct rkisp_stream *stream, u32 state); static void rkisp_buf_queue(struct vb2_buffer *vb); static const struct capture_fmt mp_fmts[] = { @@ -761,7 +761,7 @@ static int mp_config_mi(struct rkisp_stream *stream) mp_mi_ctrl_autoupdate_en(base); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -848,7 +848,7 @@ static int sp_config_mi(struct rkisp_stream *stream) sp_mi_ctrl_autoupdate_en(base); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -884,7 +884,7 @@ static int dmatx3_config_mi(struct rkisp_stream *stream) stream->memory | SW_CSI_RAW_WR_EN_ORG); mi_set_y_size(stream, in_size); - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); mi_frame_end_int_enable(stream); mi_wr_ctrl2(base, SW_RAW3_WR_AUTOUPD); mi_raw_length(stream); @@ -930,7 +930,7 @@ static int dmatx2_config_mi(struct rkisp_stream *stream) val |= SW_CSI_RAW_WR_EN_ORG; raw_wr_ctrl(stream, val); mi_set_y_size(stream, in_size); - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); mi_frame_end_int_enable(stream); mi_wr_ctrl2(base, SW_RAW2_WR_AUTOUPD); mi_raw_length(stream); @@ -974,7 +974,7 @@ static int dmatx1_config_mi(struct rkisp_stream *stream) val |= SW_CSI_RAW_WR_EN_ORG; raw_wr_ctrl(stream, val); mi_set_y_size(stream, in_size); - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); mi_frame_end_int_enable(stream); mi_wr_ctrl2(base, SW_RAW1_WR_AUTOUPD); mi_raw_length(stream); @@ -1022,7 +1022,7 @@ static int dmatx0_config_mi(struct rkisp_stream *stream) val |= SW_CSI_RAW_WR_EN_ORG; raw_wr_ctrl(dmatx, val); mi_set_y_size(dmatx, in_size); - mi_frame_end(dmatx); + mi_frame_end(dmatx, FRAME_INIT); mi_frame_end_int_enable(dmatx); mi_wr_ctrl2(base, SW_RAW0_WR_AUTOUPD); mi_raw_length(stream); @@ -1353,7 +1353,7 @@ RDBK_FRM_UNMATCH: * is processing and we should set up buffer for next-next frame, * otherwise it will overflow. */ -static int mi_frame_end(struct rkisp_stream *stream) +static int mi_frame_end(struct rkisp_stream *stream, u32 state) { struct rkisp_device *dev = stream->ispdev; struct rkisp_capture_device *cap = &dev->cap_dev; @@ -2168,7 +2168,7 @@ void rkisp_stop_spstream(struct rkisp_stream *stream) void rkisp_update_spstream_buf(struct rkisp_stream *stream) { if (stream->id == RKISP_STREAM_SP && stream->out_isp_fmt.fmt_type == FMT_FBCGAIN) - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); } /**************** Interrupter Handler ****************/ @@ -2222,7 +2222,7 @@ void rkisp_mi_v20_isr(u32 mis_val, struct rkisp_device *dev) end_tx2 = false; } } else { - mi_frame_end(stream); + mi_frame_end(stream, FRAME_IRQ); if (dev->dmarx_dev.trigger == T_AUTO && ((dev->hdr.op_mode == HDR_RDBK_FRAME1 && end_tx2) || (dev->hdr.op_mode == HDR_RDBK_FRAME2 && end_tx2 && end_tx0) || diff --git a/drivers/media/platform/rockchip/isp/capture_v21.c b/drivers/media/platform/rockchip/isp/capture_v21.c index 75eff1643ee7..79836510f56e 100644 --- a/drivers/media/platform/rockchip/isp/capture_v21.c +++ b/drivers/media/platform/rockchip/isp/capture_v21.c @@ -15,7 +15,7 @@ #define CIF_ISP_REQ_BUFS_MIN 0 -static int mi_frame_end(struct rkisp_stream *stream); +static int mi_frame_end(struct rkisp_stream *stream, u32 state); static void rkisp_buf_queue(struct vb2_buffer *vb); static const struct capture_fmt mp_fmts[] = { @@ -720,7 +720,7 @@ static int mp_config_mi(struct rkisp_stream *stream) stream->out_isp_fmt.write_format, false); mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -794,7 +794,7 @@ static int sp_config_mi(struct rkisp_stream *stream) CIF_MI_SP_AUTOUPDATE_ENABLE, false); mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -823,7 +823,7 @@ static int dmatx3_config_mi(struct rkisp_stream *stream) stream->out_fmt.height); raw_wr_set_pic_offs(stream, 0); mi_set_y_size(stream, in_size); - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); mi_frame_end_int_enable(stream); mi_wr_ctrl2(base, SW_RAW3_WR_AUTOUPD); mi_raw_length(stream); @@ -869,7 +869,7 @@ static int dmatx2_config_mi(struct rkisp_stream *stream) stream->out_fmt.height); raw_wr_set_pic_offs(stream, 0); mi_set_y_size(stream, in_size); - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); mi_frame_end_int_enable(stream); mi_wr_ctrl2(base, SW_RAW1_WR_AUTOUPD); mi_raw_length(stream); @@ -915,7 +915,7 @@ static int dmatx0_config_mi(struct rkisp_stream *stream) stream->out_fmt.height); raw_wr_set_pic_offs(stream, 0); mi_set_y_size(stream, in_size); - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); mi_frame_end_int_enable(stream); mi_wr_ctrl2(base, SW_RAW0_WR_AUTOUPD); mi_raw_length(stream); @@ -1197,7 +1197,7 @@ RDBK_FRM_UNMATCH: * is processing and we should set up buffer for next-next frame, * otherwise it will overflow. */ -static int mi_frame_end(struct rkisp_stream *stream) +static int mi_frame_end(struct rkisp_stream *stream, u32 state) { struct rkisp_device *dev = stream->ispdev; struct rkisp_capture_device *cap = &dev->cap_dev; @@ -1940,7 +1940,7 @@ void rkisp_mi_v21_isr(u32 mis_val, struct rkisp_device *dev) end_tx2 = false; } } else { - mi_frame_end(stream); + mi_frame_end(stream, FRAME_IRQ); if (dev->dmarx_dev.trigger == T_AUTO && ((dev->hdr.op_mode == HDR_RDBK_FRAME1 && end_tx2) || (dev->hdr.op_mode == HDR_RDBK_FRAME2 && end_tx2 && end_tx0))) { diff --git a/drivers/media/platform/rockchip/isp/capture_v30.c b/drivers/media/platform/rockchip/isp/capture_v30.c index 56564a3f9bac..c3ef571ff2e9 100644 --- a/drivers/media/platform/rockchip/isp/capture_v30.c +++ b/drivers/media/platform/rockchip/isp/capture_v30.c @@ -15,7 +15,7 @@ #define CIF_ISP_REQ_BUFS_MIN 0 -static int mi_frame_end(struct rkisp_stream *stream); +static int mi_frame_end(struct rkisp_stream *stream, u32 state); static int mi_frame_start(struct rkisp_stream *stream, u32 mis); static const struct capture_fmt mp_fmts[] = { @@ -521,7 +521,7 @@ static int mp_config_mi(struct rkisp_stream *stream) mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -614,7 +614,7 @@ static int sp_config_mi(struct rkisp_stream *stream) mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -642,7 +642,7 @@ static int fbc_config_mi(struct rkisp_stream *stream) false, is_unite); mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -685,7 +685,7 @@ static int bp_config_mi(struct rkisp_stream *stream) rkisp_unite_set_bits(dev, ISP3X_MI_WR_CTRL, 0, val, false, is_unite); mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); return 0; } @@ -951,19 +951,37 @@ static int mi_frame_start(struct rkisp_stream *stream, u32 mis) * is processing and we should set up buffer for next-next frame, * otherwise it will overflow. */ -static int mi_frame_end(struct rkisp_stream *stream) +static int mi_frame_end(struct rkisp_stream *stream, u32 state) { struct rkisp_device *dev = stream->ispdev; struct capture_fmt *isp_fmt = &stream->out_isp_fmt; + struct rkisp_buffer *buf = NULL; unsigned long lock_flags = 0; int i = 0; if (stream->id == RKISP_STREAM_VIR) return 0; - if (stream->curr_buf) { + if (dev->cap_dev.is_done_early && + (state == FRAME_IRQ || state == FRAME_WORK)) { + spin_lock_irqsave(&stream->vbq_lock, lock_flags); + if (state == FRAME_IRQ && stream->curr_buf) + stream->frame_early = false; + else + stream->frame_early = true; + buf = stream->curr_buf; + stream->curr_buf = NULL; + spin_unlock_irqrestore(&stream->vbq_lock, lock_flags); + if ((!stream->frame_early && state == FRAME_WORK) || + (stream->frame_early && state == FRAME_IRQ)) + goto end; + } else { + buf = stream->curr_buf; + } + + if (buf) { struct rkisp_stream *vir = &dev->cap_dev.stream[RKISP_STREAM_VIR]; - struct vb2_buffer *vb2_buf = &stream->curr_buf->vb.vb2_buf; + struct vb2_buffer *vb2_buf = &buf->vb.vb2_buf; u64 ns = 0; /* Dequeue a filled buffer */ @@ -974,7 +992,7 @@ static int mi_frame_end(struct rkisp_stream *stream) } rkisp_dmarx_get_frame(dev, &i, NULL, &ns, true); - stream->curr_buf->vb.sequence = i; + buf->vb.sequence = i; if (!ns) ns = ktime_get_ns(); vb2_buf->timestamp = ns; @@ -982,34 +1000,34 @@ static int mi_frame_end(struct rkisp_stream *stream) ns = ktime_get_ns(); stream->dbg.interval = ns - stream->dbg.timestamp; stream->dbg.timestamp = ns; - stream->dbg.id = stream->curr_buf->vb.sequence; + stream->dbg.id = buf->vb.sequence; stream->dbg.delay = ns - dev->isp_sdev.frm_timestamp; if (vir->streaming && vir->conn_id == stream->id) { spin_lock_irqsave(&vir->vbq_lock, lock_flags); - list_add_tail(&stream->curr_buf->queue, + list_add_tail(&buf->queue, &dev->cap_dev.vir_cpy.queue); spin_unlock_irqrestore(&vir->vbq_lock, lock_flags); if (!completion_done(&dev->cap_dev.vir_cpy.cmpl)) complete(&dev->cap_dev.vir_cpy.cmpl); } else { - rkisp_stream_buf_done(stream, stream->curr_buf); + rkisp_stream_buf_done(stream, buf); } - - stream->curr_buf = NULL; } +end: + if (state == FRAME_WORK) + return 0; + spin_lock_irqsave(&stream->vbq_lock, lock_flags); stream->curr_buf = stream->next_buf; stream->next_buf = NULL; - spin_lock_irqsave(&stream->vbq_lock, lock_flags); if (!list_empty(&stream->buf_queue)) { stream->next_buf = list_first_entry(&stream->buf_queue, struct rkisp_buffer, queue); list_del(&stream->next_buf->queue); } - spin_unlock_irqrestore(&stream->vbq_lock, lock_flags); - stream->ops->update_mi(stream); + spin_unlock_irqrestore(&stream->vbq_lock, lock_flags); return 0; } @@ -1711,7 +1729,7 @@ void rkisp_mi_v30_isr(u32 mis_val, struct rkisp_device *dev) wake_up(&stream->done); } } else { - mi_frame_end(stream); + mi_frame_end(stream, FRAME_IRQ); } } diff --git a/drivers/media/platform/rockchip/isp/capture_v32.c b/drivers/media/platform/rockchip/isp/capture_v32.c index 2e460ca70fde..519dc05105e3 100644 --- a/drivers/media/platform/rockchip/isp/capture_v32.c +++ b/drivers/media/platform/rockchip/isp/capture_v32.c @@ -27,7 +27,7 @@ #define CIF_ISP_REQ_BUFS_MIN 0 -static int mi_frame_end(struct rkisp_stream *stream); +static int mi_frame_end(struct rkisp_stream *stream, u32 state); static int mi_frame_start(struct rkisp_stream *stream, u32 mis); static int rkisp_create_dummy_buf(struct rkisp_stream *stream); @@ -752,7 +752,7 @@ static int mp_config_mi(struct rkisp_stream *stream) mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); rkisp_write(dev, stream->config->mi.y_offs_cnt_init, 0, false); rkisp_write(dev, stream->config->mi.cb_offs_cnt_init, 0, false); @@ -843,7 +843,7 @@ static int sp_config_mi(struct rkisp_stream *stream) mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); rkisp_write(dev, stream->config->mi.y_offs_cnt_init, 0, false); rkisp_write(dev, stream->config->mi.cb_offs_cnt_init, 0, false); @@ -886,7 +886,7 @@ static int bp_config_mi(struct rkisp_stream *stream) rkisp_set_bits(dev, ISP3X_MI_WR_CTRL, 0, val, false); mi_frame_end_int_enable(stream); /* set up first buffer */ - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); rkisp_write(dev, stream->config->mi.y_offs_cnt_init, 0, false); rkisp_write(dev, stream->config->mi.cb_offs_cnt_init, 0, false); @@ -916,7 +916,7 @@ static int ds_config_mi(struct rkisp_stream *stream) mi_frame_end_int_enable(stream); - mi_frame_end(stream); + mi_frame_end(stream, FRAME_INIT); rkisp_write(dev, stream->config->mi.y_offs_cnt_init, 0, false); rkisp_write(dev, stream->config->mi.cb_offs_cnt_init, 0, false); @@ -1098,7 +1098,6 @@ static void update_mi(struct rkisp_stream *stream) if (dev->tb_addr_idx < dev->tb_stream_info.buf_max - 1) dev->tb_addr_idx++; - stream->is_tb_s_info = true; } else if (!stream->is_pause) { stream->is_pause = true; stream->ops->disable_mi(stream); @@ -1220,7 +1219,7 @@ end: } } -static int luma_frame_end(struct rkisp_stream *stream) +static int luma_frame_end(struct rkisp_stream *stream, u32 state) { struct rkisp_device *dev = stream->ispdev; u32 val; @@ -1364,27 +1363,47 @@ static int mi_frame_start(struct rkisp_stream *stream, u32 mis) * is processing and we should set up buffer for next-next frame, * otherwise it will overflow. */ -static int mi_frame_end(struct rkisp_stream *stream) +static int mi_frame_end(struct rkisp_stream *stream, u32 state) { struct rkisp_device *dev = stream->ispdev; struct capture_fmt *isp_fmt = &stream->out_isp_fmt; unsigned long lock_flags = 0; + struct rkisp_buffer *buf = NULL; u32 i; if (stream->id == RKISP_STREAM_VIR) return 0; - set_mirror_flip(stream); + if (dev->cap_dev.is_done_early && + (state == FRAME_IRQ || state == FRAME_WORK)) { + /* skip mainpath wrap mode */ + if (stream->id == RKISP_STREAM_MP && dev->cap_dev.wrap_line) + return 0; + spin_lock_irqsave(&stream->vbq_lock, lock_flags); + if (state == FRAME_IRQ && stream->curr_buf) + stream->frame_early = false; + else + stream->frame_early = true; + buf = stream->curr_buf; + stream->curr_buf = NULL; + spin_unlock_irqrestore(&stream->vbq_lock, lock_flags); + if ((!stream->frame_early && state == FRAME_WORK) || + (stream->frame_early && state == FRAME_IRQ)) + goto end; + } else { + buf = stream->curr_buf; + } - if (stream->curr_buf) { - struct vb2_buffer *vb2_buf = &stream->curr_buf->vb.vb2_buf; + if (buf) { + struct vb2_buffer *vb2_buf = &buf->vb.vb2_buf; struct rkisp_stream *vir = &dev->cap_dev.stream[RKISP_STREAM_VIR]; + u64 ns = 0; if (dev->skip_frame) { spin_lock_irqsave(&stream->vbq_lock, lock_flags); - list_add_tail(&stream->curr_buf->queue, &stream->buf_queue); + list_add_tail(&buf->queue, &stream->buf_queue); spin_unlock_irqrestore(&stream->vbq_lock, lock_flags); - goto next; + goto end; } for (i = 0; i < isp_fmt->mplanes; i++) { @@ -1393,22 +1412,37 @@ static int mi_frame_end(struct rkisp_stream *stream) vb2_set_plane_payload(vb2_buf, i, payload_size); } + rkisp_dmarx_get_frame(dev, &i, NULL, &ns, true); + if (!ns) + ns = ktime_get_ns(); + buf->vb.sequence = i; + buf->vb.vb2_buf.timestamp = ns; + ns = ktime_get_ns(); + stream->dbg.interval = ns - stream->dbg.timestamp; + stream->dbg.delay = ns - dev->isp_sdev.frm_timestamp; + stream->dbg.timestamp = ns; + stream->dbg.id = i; + if (vb2_buf->memory) { if (vir->streaming && vir->conn_id == stream->id) { spin_lock_irqsave(&vir->vbq_lock, lock_flags); - list_add_tail(&stream->curr_buf->queue, + list_add_tail(&buf->queue, &dev->cap_dev.vir_cpy.queue); spin_unlock_irqrestore(&vir->vbq_lock, lock_flags); if (!completion_done(&dev->cap_dev.vir_cpy.cmpl)) complete(&dev->cap_dev.vir_cpy.cmpl); } else { - rkisp_stream_buf_done(stream, stream->curr_buf); + rkisp_stream_buf_done(stream, buf); } } else { rkisp_rockit_buf_done(stream, ROCKIT_DVBM_END); } } -next: + +end: + if (state == FRAME_WORK) + return 0; + set_mirror_flip(stream); spin_lock_irqsave(&stream->vbq_lock, lock_flags); stream->curr_buf = stream->next_buf; stream->next_buf = NULL; @@ -2221,31 +2255,6 @@ void rkisp_mi_v32_isr(u32 mis_val, struct rkisp_device *dev) if (i == RKISP_STREAM_MP) rkisp_dvbm_event(dev, CIF_MI_MP_FRAME); - rkisp_dmarx_get_frame(dev, &seq, NULL, &ns, true); - if (!ns) - ns = ktime_get_ns(); - if (stream->curr_buf) { - stream->curr_buf->vb.sequence = seq; - stream->curr_buf->vb.vb2_buf.timestamp = ns; - - ns = ktime_get_ns(); - stream->dbg.interval = ns - stream->dbg.timestamp; - stream->dbg.delay = ns - dev->isp_sdev.frm_timestamp; - stream->dbg.timestamp = ns; - stream->dbg.id = seq; - } - if (stream->is_tb_s_info) { - struct rkisp_tb_stream_info *tb_info = &dev->tb_stream_info; - u32 idx; - - if (tb_info->buf_cnt < tb_info->buf_max) - tb_info->buf_cnt++; - idx = tb_info->buf_cnt - 1; - dev->tb_stream_info.buf[idx].sequence = seq; - dev->tb_stream_info.buf[idx].timestamp = ns; - stream->is_tb_s_info = false; - } - if (stream->stopping) { /* * Make sure stream is actually stopped, whose state @@ -2265,9 +2274,15 @@ void rkisp_mi_v32_isr(u32 mis_val, struct rkisp_device *dev) stream->streaming = false; wake_up(&stream->done); } + } else if (stream->id == RKISP_STREAM_MP && dev->cap_dev.wrap_line) { + ns = ktime_get_ns(); + rkisp_dmarx_get_frame(dev, &seq, NULL, NULL, true); + stream->dbg.interval = ns - stream->dbg.timestamp; + stream->dbg.delay = ns - dev->isp_sdev.frm_timestamp; + stream->dbg.timestamp = ns; + stream->dbg.id = seq; } else { - if (stream->id != RKISP_STREAM_MP || !dev->cap_dev.wrap_line) - mi_frame_end(stream); + mi_frame_end(stream, FRAME_IRQ); } } diff --git a/drivers/media/platform/rockchip/isp/dev.h b/drivers/media/platform/rockchip/isp/dev.h index 37b52d159c49..29af28c86b6c 100644 --- a/drivers/media/platform/rockchip/isp/dev.h +++ b/drivers/media/platform/rockchip/isp/dev.h @@ -253,5 +253,6 @@ struct rkisp_device { u8 multi_mode; u8 multi_index; + u8 rawaf_irq_cnt; }; #endif diff --git a/drivers/media/platform/rockchip/isp/isp_params_v3x.c b/drivers/media/platform/rockchip/isp/isp_params_v3x.c index 31d24dcb1672..87db6aa87554 100644 --- a/drivers/media/platform/rockchip/isp/isp_params_v3x.c +++ b/drivers/media/platform/rockchip/isp/isp_params_v3x.c @@ -1061,14 +1061,6 @@ isp_rawaf_config(struct rkisp_isp_params_vdev *params_vdev, ISP3X_RAWAF_SIZE_WINA + i * 8, id); } - var = 0; - for (i = 0; i < ISP3X_RAWAF_LINE_NUM; i++) { - if (arg->line_en[i]) - var |= ISP3X_RAWAF_INTLINE0_EN << i; - var |= ISP3X_RAWAF_INELINE0(arg->line_num[i]) << 4 * i; - } - isp3_param_write(params_vdev, var, ISP3X_RAWAF_INT_LINE, id); - var = isp3_param_read(params_vdev, ISP3X_RAWAF_THRES, id); var &= ~0xFFFF; var |= arg->afm_thres; diff --git a/drivers/media/platform/rockchip/isp/isp_stats_v3x.c b/drivers/media/platform/rockchip/isp/isp_stats_v3x.c index 3b72a1c339f0..7b21a80b038b 100644 --- a/drivers/media/platform/rockchip/isp/isp_stats_v3x.c +++ b/drivers/media/platform/rockchip/isp/isp_stats_v3x.c @@ -1005,12 +1005,10 @@ rkisp_stats_send_meas_v3x(struct rkisp_isp_stats_vdev *stats_vdev, v4l2_warn(stats_vdev->vnode.vdev.v4l2_dev, "ISP3X_3A_RAWAF_SUM\n"); + ops->get_rawaf_meas(stats_vdev, cur_stat_buf, 0); if (meas_work->isp3a_ris & ISP3X_3A_RAWAWB) ret |= ops->get_rawawb_meas(stats_vdev, cur_stat_buf, 0); - if (meas_work->isp3a_ris & ISP3X_3A_RAWAF) - ret |= ops->get_rawaf_meas(stats_vdev, cur_stat_buf, 0); - if (meas_work->isp3a_ris & ISP3X_3A_RAWAE_BIG) ret |= ops->get_rawae3_meas(stats_vdev, cur_stat_buf, 0); @@ -1046,10 +1044,9 @@ rkisp_stats_send_meas_v3x(struct rkisp_isp_stats_vdev *stats_vdev, cur_stat_buf++; cur_stat_buf->frame_id = cur_frame_id; } + ops->get_rawaf_meas(stats_vdev, cur_stat_buf, 1); if (meas_work->isp3a_ris & ISP3X_3A_RAWAWB) ret |= ops->get_rawawb_meas(stats_vdev, cur_stat_buf, 1); - if (meas_work->isp3a_ris & ISP3X_3A_RAWAF) - ret |= ops->get_rawaf_meas(stats_vdev, cur_stat_buf, 1); if (meas_work->isp3a_ris & ISP3X_3A_RAWAE_BIG) ret |= ops->get_rawae3_meas(stats_vdev, cur_stat_buf, 1); if (meas_work->isp3a_ris & ISP3X_3A_RAWHIST_BIG) diff --git a/drivers/media/platform/rockchip/isp/rkisp.c b/drivers/media/platform/rockchip/isp/rkisp.c index 44173416fa60..485f3346d35e 100644 --- a/drivers/media/platform/rockchip/isp/rkisp.c +++ b/drivers/media/platform/rockchip/isp/rkisp.c @@ -2126,6 +2126,30 @@ static int rkisp_isp_start(struct rkisp_device *dev) atomic_read(&dev->hw_dev->refcnt), dev->hw_dev->dev_link_num); + dev->cap_dev.is_done_early = false; + if (dev->cap_dev.wait_line >= dev->isp_sdev.out_crop.height) + dev->cap_dev.wait_line = 0; + if (dev->cap_dev.wait_line) { + dev->cap_dev.is_done_early = true; + if (dev->isp_ver >= ISP_V32) { + val = dev->cap_dev.wait_line; + rkisp_write(dev, ISP32_ISP_IRQ_CFG0, val << 16, false); + rkisp_set_bits(dev, CIF_ISP_IMSC, 0, ISP3X_OUT_FRM_HALF, false); + } else { + /* using AF 15x15 block */ + val = dev->isp_sdev.out_crop.height / 15; + val = dev->cap_dev.wait_line / val; + val = ISP3X_RAWAF_INELINE0(val) | ISP3X_RAWAF_INTLINE0_EN; + rkisp_unite_write(dev, ISP3X_RAWAF_INT_LINE, + val, false, dev->hw_dev->is_unite); + rkisp_unite_set_bits(dev, ISP_ISP3A_IMSC, 0, + ISP2X_3A_RAWAF, false, dev->hw_dev->is_unite); + rkisp_unite_clear_bits(dev, CIF_ISP_IMSC, + ISP2X_LSC_LUT_ERR, false, dev->hw_dev->is_unite); + dev->rawaf_irq_cnt = 0; + } + } + /* Activate MIPI */ if (sensor && sensor->mbus.type == V4L2_MBUS_CSI2_DPHY) { if (dev->isp_ver == ISP_V12 || dev->isp_ver == ISP_V13) { @@ -2817,7 +2841,7 @@ static void rkisp_global_update_mi(struct rkisp_device *dev) stream->id == RKISP_STREAM_LUMA) continue; if (stream->streaming && !stream->curr_buf) - stream->ops->frame_end(stream); + stream->ops->frame_end(stream, FRAME_INIT); } } rkisp_stats_next_ddr_config(&dev->stats_vdev); @@ -3968,7 +3992,7 @@ void rkisp_isp_isr(unsigned int isp_mis, ISP2X_3A_RAWHIST_BIG | ISP2X_3A_RAWHIST_CH0 | ISP2X_3A_RAWHIST_CH1 | ISP2X_3A_RAWHIST_CH2 | ISP2X_3A_RAWAF_SUM | ISP2X_3A_RAWAF_LUM | - ISP2X_3A_RAWAF | ISP2X_3A_RAWAWB; + ISP2X_3A_RAWAWB; bool sof_event_later = false; /* @@ -4085,6 +4109,16 @@ vs_skip: } } + if (isp3a_mis & ISP2X_3A_RAWAF) { + writel(ISP3X_3A_RAWAF, base + ISP3X_ISP_3A_ICR); + /* 3a irq will with lsc_lut_err irq if isp version below isp32 */ + if (isp_mis & ISP2X_LSC_LUT_ERR) + isp_mis &= ~ISP2X_LSC_LUT_ERR; + if (dev->rawaf_irq_cnt == 0) + rkisp_stream_buf_done_early(dev); + dev->rawaf_irq_cnt++; + } + if (isp_mis & ISP2X_LSC_LUT_ERR) { writel(ISP2X_LSC_LUT_ERR, base + CIF_ISP_ICR); @@ -4108,6 +4142,7 @@ vs_skip: /* frame was completely put out */ if (isp_mis & CIF_ISP_FRAME) { + dev->rawaf_irq_cnt = 0; if (!dev->is_pre_on || !IS_HDR_RDBK(dev->rd_mode)) dev->isp_sdev.dbg.interval = ktime_get_ns() - dev->isp_sdev.dbg.timestamp; @@ -4164,7 +4199,7 @@ vs_skip: if (dev->hw_dev->isp_ver == ISP_V32) { struct rkisp_stream *s = &dev->cap_dev.stream[RKISP_STREAM_LUMA]; - s->ops->frame_end(s); + s->ops->frame_end(s, FRAME_IRQ); } if (dev->procfs.is_fe_wait) { dev->procfs.is_fe_wait = false; @@ -4177,7 +4212,8 @@ vs_skip: * lot of register writes. Do those only one per frame. * Do the updates in the order of the processing flow. */ - rkisp_params_isr(&dev->params_vdev, isp_mis); + if (isp_mis & (CIF_ISP_V_START | CIF_ISP_FRAME)) + rkisp_params_isr(&dev->params_vdev, isp_mis); /* cur frame end and next frame start irq togeter */ if (dev->vs_irq < 0 && sof_event_later) { @@ -4193,6 +4229,7 @@ vs_skip: if (isp_mis & ISP3X_OUT_FRM_HALF) { writel(ISP3X_OUT_FRM_HALF, base + CIF_ISP_ICR); rkisp_dvbm_event(dev, ISP3X_OUT_FRM_HALF); + rkisp_stream_buf_done_early(dev); } if (isp_mis & ISP3X_OUT_FRM_END) { writel(ISP3X_OUT_FRM_END, base + CIF_ISP_ICR); From d19bd18e3ef92d5b4cd3e2aa2640a2ec1d299191 Mon Sep 17 00:00:00 2001 From: Wangqiang Guo Date: Fri, 3 Mar 2023 06:43:31 +0000 Subject: [PATCH 28/79] media: i2c: sc850sl: add camera sensor driver. Change-Id: If3bec0f4a6458ce664ba073565c28d0f9256350d Signed-off-by: Wangqiang Guo --- drivers/media/i2c/Kconfig | 10 + drivers/media/i2c/Makefile | 1 + drivers/media/i2c/sc850sl.c | 1776 +++++++++++++++++++++++++++++++++++ 3 files changed, 1787 insertions(+) create mode 100644 drivers/media/i2c/sc850sl.c diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index 2a54b9213f46..41ca537a6b40 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -2078,6 +2078,16 @@ config VIDEO_SC530AI This is a Video4Linux2 sensor driver for the SmartSens SC530AI camera. +config VIDEO_SC850SL + tristate "SmartSens SC850SL sensor support" + depends on I2C && VIDEO_V4L2 + select MEDIA_CONTROLLER + select VIDEO_V4L2_SUBDEV_API + select V4L2_FWNODE + help + This is a Video4Linux2 sensor driver for the SmartSens + SC850SL camera. + config VIDEO_SENSOR_ADAPTER tristate "Rockchip sensor driver adapter" depends on I2C && VIDEO_V4L2 diff --git a/drivers/media/i2c/Makefile b/drivers/media/i2c/Makefile index d96405c9a077..5dd19e4b55d4 100644 --- a/drivers/media/i2c/Makefile +++ b/drivers/media/i2c/Makefile @@ -143,6 +143,7 @@ obj-$(CONFIG_VIDEO_SC4336) += sc4336.o obj-$(CONFIG_VIDEO_SC500AI) += sc500ai.o obj-$(CONFIG_VIDEO_SC501AI) += sc501ai.o obj-$(CONFIG_VIDEO_SC530AI) += sc530ai.o +obj-$(CONFIG_VIDEO_SC850SL) += sc850sl.o obj-$(CONFIG_VIDEO_SENSOR_ADAPTER) += sensor_adapter.o obj-$(CONFIG_VIDEO_SR030PC30) += sr030pc30.o obj-$(CONFIG_VIDEO_NOON010PC30) += noon010pc30.o diff --git a/drivers/media/i2c/sc850sl.c b/drivers/media/i2c/sc850sl.c new file mode 100644 index 000000000000..14a1e548584c --- /dev/null +++ b/drivers/media/i2c/sc850sl.c @@ -0,0 +1,1776 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sc850sl driver + * + * Copyright (C) 2023 Rockchip Electronics Co., Ltd. + * + * V0.0X01.0X01 first version + */ + +//#define DEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../platform/rockchip/isp/rkisp_tb_helper.h" + +#define DRIVER_VERSION KERNEL_VERSION(0, 0x01, 0x01) + +#ifndef V4L2_CID_DIGITAL_GAIN +#define V4L2_CID_DIGITAL_GAIN V4L2_CID_GAIN +#endif + +#define MIPI_FREQ_540M 540000000 + +#define SC850SL_MAX_PIXEL_RATE (MIPI_FREQ_540M / 10 * 2 * SC850SL_4LANES) +#define OF_CAMERA_HDR_MODE "rockchip,camera-hdr-mode" + +#define SC850SL_XVCLK_FREQ_24M 24000000 + +/* TODO: Get the real chip id from reg */ +#define CHIP_ID 0x9D1E +#define SC850SL_REG_CHIP_ID 0x3107 + +#define SC850SL_REG_CTRL_MODE 0x0100 +#define SC850SL_MODE_SW_STANDBY 0x0 +#define SC850SL_MODE_STREAMING BIT(0) + +/*expo*/ +#define SC850SL_EXPOSURE_MIN 2 /*okay*/ +#define SC850SL_EXPOSURE_STEP 1 /*okay*/ +#define SC850SL_VTS_MAX 0xffff /*okay*/ + +//long exposure +#define SC850SL_REG_EXP_LONG_H 0x3e00 //[3:0] +#define SC850SL_REG_EXP_LONG_M 0x3e01 //[7:0] +#define SC850SL_REG_EXP_LONG_L 0x3e02 //[7:4] + +//short exposure //for hdr +#define SC850SL_REG_EXP_SF_H 0x3e22 +#define SC850SL_REG_EXP_SF_M 0x3e04 //[7:0] +#define SC850SL_REG_EXP_SF_L 0x3e05 //[7:4] + +#define SC850SL_FETCH_EXP_H(VAL) (((VAL) >> 12) & 0xF) +#define SC850SL_FETCH_EXP_M(VAL) (((VAL) >> 4) & 0xFF) +#define SC850SL_FETCH_EXP_L(VAL) (((VAL) & 0xF) << 4) + +/*gain*/ +//long frame and normal gain reg +#define SC850SL_REG_DGAIN 0x3e06 +#define SC850SL_REG_AGAIN 0x3e08 +#define SC850SL_REG_AGAIN_FINE 0x3e09 +//#define SC850SL_REG_DGAIN_FINE 0x3e07 + +//short fram gain reg +#define SC850SL_SF_REG_AGAIN 0x3e12 +#define SC850SL_SF_REG_AGAIN_FINE 0x3e13 +#define SC850SL_SF_REG_DGAIN 0x3e10 + +#define SC850SL_GAIN_MIN 0x40 //1.000 = 64 * 1/64 +#define SC850SL_GAIN_MAX (8 * 50 * 64) /*need_view 8*50*64=25600 */ +#define SC850SL_GAIN_STEP 1 +#define SC850SL_GAIN_DEFAULT 0x40 + +#define SC850SL_REG_VTS 0x320e + +//group hold +#define SC850SL_GROUP_UPDATE_ADDRESS 0x3800 +#define SC850SL_GROUP_UPDATE_START_DATA 0x00 +#define SC850SL_GROUP_UPDATE_LAUNCH 0x30 + +#define SC850SL_SOFTWARE_RESET_REG 0x0103 +#define SC850SL_REG_TEST_PATTERN 0x4501 +#define SC850SL_TEST_PATTERN_ENABLE 0x08 + +#define SC850SL_FLIP_REG 0x3221 +#define SC850SL_FLIP_MASK 0x60 +#define SC850SL_MIRROR_MASK 0x06 + +#define REG_NULL 0xFFFF + +#define SC850SL_REG_VALUE_08BIT 1 +#define SC850SL_REG_VALUE_16BIT 2 +#define SC850SL_REG_VALUE_24BIT 3 + +#define SC850SL_4LANES 4 + +#define OF_CAMERA_PINCTRL_STATE_DEFAULT "rockchip,camera_default" +#define OF_CAMERA_PINCTRL_STATE_SLEEP "rockchip,camera_sleep" + +#define SC850SL_NAME "sc850sl" + + +static const char * const sc850sl_supply_names[] = { + "dvdd", // Digital core power + "dovdd", // Digital I/O power + "avdd", // Analog power +}; +#define SC850SL_NUM_SUPPLIES ARRAY_SIZE(sc850sl_supply_names) + +struct regval { + u16 addr; + u8 val; +}; + +struct sc850sl_mode { + u32 bus_fmt; + u32 width; + u32 height; + struct v4l2_fract max_fps; + u32 hts_def; + u32 vts_def; + u32 exp_def; + u32 mipi_freq_idx; + u32 bpp; + const struct regval *reg_list; + u32 hdr_mode; + u32 vc[PAD_MAX]; +}; + +struct sc850sl { + struct i2c_client *client; + struct clk *xvclk; + struct gpio_desc *reset_gpio; + struct gpio_desc *power_gpio; + struct regulator_bulk_data supplies[SC850SL_NUM_SUPPLIES]; + + struct pinctrl *pinctrl; + struct pinctrl_state *pins_default; + struct pinctrl_state *pins_sleep; + + struct v4l2_subdev subdev; + struct media_pad pad; + struct v4l2_ctrl_handler ctrl_handler; + struct v4l2_ctrl *exposure; + struct v4l2_ctrl *anal_a_gain; + struct v4l2_ctrl *digi_gain; + struct v4l2_ctrl *hblank; + struct v4l2_ctrl *vblank; + struct v4l2_ctrl *test_pattern; + struct v4l2_ctrl *pixel_rate; + struct v4l2_ctrl *link_freq; + struct mutex mutex; + struct v4l2_fract cur_fps; + bool streaming; + bool power_on; + bool is_first_streamoff; + const struct sc850sl_mode *cur_mode; + u32 module_index; + u32 cfg_num; + const char *module_facing; + const char *module_name; + const char *len_name; + u32 cur_vts; + bool has_init_exp; + struct preisp_hdrae_exp_s init_hdrae_exp; +}; + + +#define to_sc850sl(sd) container_of(sd, struct sc850sl, subdev) + +//cleaned_0x20_SC850SL_MIPI_24Minput_1C4D_1080Mbps_10bit_3840x2160_30fps_one_expo.ini +static __maybe_unused const struct regval sc850sl_linear10bit_3840x2160_regs[] = { + {0x0103, 0x01}, + {0x0100, 0x00}, + {0x36e9, 0x80}, + {0x36f9, 0x80}, + {0x36ea, 0x09}, + {0x36eb, 0x0c}, + {0x36ec, 0x4a}, + {0x36ed, 0x24}, + {0x36fa, 0x0b}, + {0x36fb, 0x33}, + {0x36fc, 0x10}, + {0x36fd, 0x37}, + {0x36e9, 0x24}, + {0x36f9, 0x53}, + {0x3018, 0x7a}, + {0x3019, 0xf0}, + {0x301a, 0x30}, + {0x301e, 0x3c}, + {0x301f, 0x20}, + {0x302a, 0x00}, + {0x3031, 0x0a}, + {0x3032, 0x20}, + {0x3033, 0x22}, + {0x3037, 0x00}, + {0x303e, 0xb4}, + {0x320c, 0x04}, + {0x320d, 0x4c}, + {0x3226, 0x00}, + {0x3227, 0x03}, + {0x3250, 0x40}, + {0x3253, 0x08}, + {0x327e, 0x00}, + {0x3280, 0x00}, + {0x3281, 0x00}, + {0x3301, 0x3c}, + {0x3304, 0x30}, + {0x3306, 0xe8}, + {0x3308, 0x10}, + {0x3309, 0x70}, + {0x330a, 0x01}, + {0x330b, 0xe0}, + {0x330d, 0x10}, + {0x3314, 0x92}, + {0x331e, 0x29}, + {0x331f, 0x69}, + {0x3333, 0x10}, + {0x3347, 0x05}, + {0x3348, 0xd0}, + {0x3352, 0x01}, + {0x3356, 0x38}, + {0x335d, 0x60}, + {0x3362, 0x70}, + {0x338f, 0x80}, + {0x33af, 0x48}, + {0x33fe, 0x00}, + {0x3400, 0x12}, + {0x3406, 0x04}, + {0x3410, 0x12}, + {0x3416, 0x06}, + {0x3433, 0x01}, + {0x3440, 0x12}, + {0x3446, 0x08}, + {0x3478, 0x01}, + {0x3479, 0x01}, + {0x347a, 0x02}, + {0x347b, 0x01}, + {0x347c, 0x04}, + {0x347d, 0x01}, + {0x3616, 0x0c}, + {0x3620, 0x92}, + {0x3622, 0x74}, + {0x3629, 0x74}, + {0x362a, 0xf0}, + {0x362b, 0x0f}, + {0x362d, 0x00}, + {0x3630, 0x68}, + {0x3633, 0x22}, + {0x3634, 0x22}, + {0x3635, 0x20}, + {0x3637, 0x06}, + {0x3638, 0x26}, + {0x363b, 0x06}, + {0x363c, 0x08}, + {0x363d, 0x05}, + {0x363e, 0x8f}, + {0x3648, 0xe0}, + {0x3649, 0x0a}, + {0x364a, 0x06}, + {0x364c, 0x6a}, + {0x3650, 0x3d}, + {0x3654, 0x40}, + {0x3656, 0x68}, + {0x3657, 0x0f}, + {0x3658, 0x3d}, + {0x365c, 0x40}, + {0x365e, 0x68}, + {0x3901, 0x04}, + {0x3904, 0x20}, + {0x3905, 0x91}, + {0x391e, 0x83}, + {0x3928, 0x04}, + {0x3933, 0xa0}, + {0x3934, 0x0a}, + {0x3935, 0x68}, + {0x3936, 0x00}, + {0x3937, 0x20}, + {0x3938, 0x0a}, + {0x3946, 0x20}, + {0x3961, 0x40}, + {0x3962, 0x40}, + {0x3963, 0xc8}, + {0x3964, 0xc8}, + {0x3965, 0x40}, + {0x3966, 0x40}, + {0x3967, 0x00}, + {0x39cd, 0xc8}, + {0x39ce, 0xc8}, + {0x3e01, 0x82}, + {0x3e02, 0x00}, + {0x3e0e, 0x02}, + {0x3e0f, 0x00}, + {0x3e1c, 0x0f}, + {0x3e23, 0x00}, + {0x3e24, 0x00}, + {0x3e53, 0x00}, + {0x3e54, 0x00}, + {0x3e68, 0x00}, + {0x3e69, 0x80}, + {0x3e73, 0x00}, + {0x3e74, 0x00}, + {0x3e86, 0x03}, + {0x3e87, 0x40}, + {0x3f02, 0x24}, + {0x4424, 0x02}, + {0x4501, 0xc4}, + {0x4509, 0x20}, + {0x4561, 0x12}, + {0x4800, 0x24}, + {0x4837, 0x0f}, + {0x4900, 0x24}, + {0x4937, 0x0f}, + {0x5000, 0x0e}, + {0x500f, 0x35}, + {0x5020, 0x00}, + {0x5787, 0x10}, + {0x5788, 0x06}, + {0x5789, 0x00}, + {0x578a, 0x18}, + {0x578b, 0x0c}, + {0x578c, 0x00}, + {0x5790, 0x10}, + {0x5791, 0x06}, + {0x5792, 0x01}, + {0x5793, 0x18}, + {0x5794, 0x0c}, + {0x5795, 0x01}, + {0x5799, 0x06}, + {0x57a2, 0x60}, + {0x59e0, 0xfe}, + {0x59e1, 0x40}, + {0x59e2, 0x38}, + {0x59e3, 0x30}, + {0x59e4, 0x20}, + {0x59e5, 0x38}, + {0x59e6, 0x30}, + {0x59e7, 0x20}, + {0x59e8, 0x3f}, + {0x59e9, 0x38}, + {0x59ea, 0x30}, + {0x59eb, 0x3f}, + {0x59ec, 0x38}, + {0x59ed, 0x30}, + {0x59ee, 0xfe}, + {0x59ef, 0x40}, + {0x59f4, 0x38}, + {0x59f5, 0x30}, + {0x59f6, 0x20}, + {0x59f7, 0x38}, + {0x59f8, 0x30}, + {0x59f9, 0x20}, + {0x59fa, 0x3f}, + {0x59fb, 0x38}, + {0x59fc, 0x30}, + {0x59fd, 0x3f}, + {0x59fe, 0x38}, + {0x59ff, 0x30}, + {0x0100, 0x01}, + /* + * [gain < 2x] {0x363c, 0x05}, + * [gain >=2x] {0x363c, 0x07}, + */ + {0x363c, 0x07}, + {REG_NULL, 0x00}, +}; + +/* + * The width and height must be configured to be + * the same as the current output resolution of the sensor. + * The input width of the isp needs to be 16 aligned. + * The input height of the isp needs to be 8 aligned. + * If the width or height does not meet the alignment rules, + * you can configure the cropping parameters with the following function to + * crop out the appropriate resolution. + * struct v4l2_subdev_pad_ops { + * .get_selection + * } + */ +static const struct sc850sl_mode supported_modes[] = { + { + .bus_fmt = MEDIA_BUS_FMT_SBGGR10_1X10, + .width = 3840, + .height = 2160, + .max_fps = { + .numerator = 10000, + .denominator = 300000, + }, + .exp_def = 0x08c0, + .hts_def = 0x0226*5-0x180, + .vts_def = 0x08ca, + .bus_fmt = MEDIA_BUS_FMT_SBGGR10_1X10, + .reg_list = sc850sl_linear10bit_3840x2160_regs, + .hdr_mode = NO_HDR, + .mipi_freq_idx = 0, + .bpp = 10, + .vc[PAD0] = V4L2_MBUS_CSI2_CHANNEL_0, + }, +}; + + + +static const char * const sc850sl_test_pattern_menu[] = { + "Disabled", + "Vertical Color Bar Type 1", + "Vertical Color Bar Type 2", + "Vertical Color Bar Type 3", + "Vertical Color Bar Type 4" +}; + +static const s64 link_freq_items[] = { + MIPI_FREQ_540M, +}; + +/* Write registers up to 4 at a time */ +static int sc850sl_write_reg(struct i2c_client *client, u16 reg, + u32 len, u32 val) +{ + u32 buf_i, val_i; + u8 buf[6]; + u8 *val_p; + __be32 val_be; + + if (len > 4) + return -EINVAL; + + buf[0] = reg >> 8; + buf[1] = reg & 0xff; + + val_be = cpu_to_be32(val); + val_p = (u8 *)&val_be; + buf_i = 2; + val_i = 4 - len; + + while (val_i < 4) + buf[buf_i++] = val_p[val_i++]; + + if (i2c_master_send(client, buf, len + 2) != len + 2) + return -EIO; + + return 0; +} + +static int sc850sl_write_array(struct i2c_client *client, + const struct regval *regs) +{ + u32 i; + int ret = 0; + + for (i = 0; ret == 0 && regs[i].addr != REG_NULL; i++) { + ret = sc850sl_write_reg(client, regs[i].addr, + SC850SL_REG_VALUE_08BIT, regs[i].val); + } + return ret; +} + +/* Read registers up to 4 at a time */ +static int sc850sl_read_reg(struct i2c_client *client, u16 reg, unsigned int len, + u32 *val) +{ + struct i2c_msg msgs[2]; + u8 *data_be_p; + __be32 data_be = 0; + __be16 reg_addr_be = cpu_to_be16(reg); + int ret; + + if (len > 4 || !len) + return -EINVAL; + + data_be_p = (u8 *)&data_be; + /* Write register address */ + msgs[0].addr = client->addr; + msgs[0].flags = 0; + msgs[0].len = 2; + msgs[0].buf = (u8 *)®_addr_be; + + /* Read data from register */ + msgs[1].addr = client->addr; + msgs[1].flags = I2C_M_RD; + msgs[1].len = len; + msgs[1].buf = &data_be_p[4 - len]; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret != ARRAY_SIZE(msgs)) + return -EIO; + + *val = be32_to_cpu(data_be); + + return 0; +} + +static int sc850sl_get_reso_dist(const struct sc850sl_mode *mode, + struct v4l2_mbus_framefmt *framefmt) +{ + return abs(mode->width - framefmt->width) + + abs(mode->height - framefmt->height); +} + +static const struct sc850sl_mode * +sc850sl_find_best_fit(struct sc850sl *sc850sl, struct v4l2_subdev_format *fmt) +{ + struct v4l2_mbus_framefmt *framefmt = &fmt->format; + int dist; + int cur_best_fit = 0; + int cur_best_fit_dist = -1; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(supported_modes); i++) { + dist = sc850sl_get_reso_dist(&supported_modes[i], framefmt); + if ((cur_best_fit_dist == -1 || dist < cur_best_fit_dist) && + supported_modes[i].bus_fmt == framefmt->code) { + cur_best_fit_dist = dist; + cur_best_fit = i; + } + } + dev_info(&sc850sl->client->dev, "%s: cur_best_fit(%d)", + __func__, cur_best_fit); + + return &supported_modes[cur_best_fit]; +} + +static void sc850sl_change_mode(struct sc850sl *sc850sl, const struct sc850sl_mode *mode) +{ + sc850sl->cur_mode = mode; + sc850sl->cur_vts = sc850sl->cur_mode->vts_def; + dev_info(&sc850sl->client->dev, "set fmt: cur_mode: %dx%d, hdr: %d\n", + mode->width, mode->height, mode->hdr_mode); +} + +static int sc850sl_set_fmt(struct v4l2_subdev *sd, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_format *fmt) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + const struct sc850sl_mode *mode; + s64 h_blank, vblank_def; + u64 pixel_rate = 0; + + mutex_lock(&sc850sl->mutex); + + mode = sc850sl_find_best_fit(sc850sl, fmt); + fmt->format.code = mode->bus_fmt; + fmt->format.width = mode->width; + fmt->format.height = mode->height; + fmt->format.field = V4L2_FIELD_NONE; + if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) { +#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API + *v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format; +#else + mutex_unlock(&sc850sl->mutex); + return -ENOTTY; +#endif + } else { + sc850sl_change_mode(sc850sl, mode); + h_blank = mode->hts_def - mode->width; + __v4l2_ctrl_modify_range(sc850sl->hblank, h_blank, + h_blank, 1, h_blank); + vblank_def = mode->vts_def - mode->height; + __v4l2_ctrl_modify_range(sc850sl->vblank, vblank_def, + SC850SL_VTS_MAX - mode->height, + 1, vblank_def); + __v4l2_ctrl_s_ctrl(sc850sl->link_freq, mode->mipi_freq_idx); + pixel_rate = (u32)link_freq_items[mode->mipi_freq_idx] / + mode->bpp * 2 * SC850SL_4LANES; + __v4l2_ctrl_s_ctrl_int64(sc850sl->pixel_rate, pixel_rate); + sc850sl->cur_fps = mode->max_fps; + sc850sl->cur_vts = mode->vts_def; + } + + mutex_unlock(&sc850sl->mutex); + + return 0; +} + +static int sc850sl_get_fmt(struct v4l2_subdev *sd, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_format *fmt) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + const struct sc850sl_mode *mode = sc850sl->cur_mode; + + mutex_lock(&sc850sl->mutex); + if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) { +#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API + fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad); +#else + mutex_unlock(&sc850sl->mutex); + return -ENOTTY; +#endif + } else { + fmt->format.width = mode->width; + fmt->format.height = mode->height; + fmt->format.code = mode->bus_fmt; + fmt->format.field = V4L2_FIELD_NONE; + if (fmt->pad < PAD_MAX && mode->hdr_mode != NO_HDR) + fmt->reserved[0] = mode->vc[fmt->pad]; + else + fmt->reserved[0] = mode->vc[PAD0]; + } + mutex_unlock(&sc850sl->mutex); + + return 0; +} + +static int sc850sl_enum_mbus_code(struct v4l2_subdev *sd, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_mbus_code_enum *code) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + + if (code->index != 0) + return -EINVAL; + code->code = sc850sl->cur_mode->bus_fmt; + + return 0; +} + +static int sc850sl_enum_frame_sizes(struct v4l2_subdev *sd, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_frame_size_enum *fse) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + + if (fse->index >= sc850sl->cfg_num) + return -EINVAL; + + if (fse->code != supported_modes[fse->index].bus_fmt) + return -EINVAL; + + fse->min_width = supported_modes[fse->index].width; + fse->max_width = supported_modes[fse->index].width; + fse->max_height = supported_modes[fse->index].height; + fse->min_height = supported_modes[fse->index].height; + + return 0; +} + +static int sc850sl_enable_test_pattern(struct sc850sl *sc850sl, u32 pattern) +{ + u32 val = 0; + int ret = 0; + + ret = sc850sl_read_reg(sc850sl->client, SC850SL_REG_TEST_PATTERN, + SC850SL_REG_VALUE_08BIT, &val); + if (pattern) + val |= SC850SL_TEST_PATTERN_ENABLE; + else + val &= ~SC850SL_TEST_PATTERN_ENABLE; + ret |= sc850sl_write_reg(sc850sl->client, SC850SL_REG_TEST_PATTERN, + SC850SL_REG_VALUE_08BIT, val); + return ret; +} + +static int sc850sl_g_frame_interval(struct v4l2_subdev *sd, + struct v4l2_subdev_frame_interval *fi) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + const struct sc850sl_mode *mode = sc850sl->cur_mode; + + if (sc850sl->streaming) + fi->interval = sc850sl->cur_fps; + else + fi->interval = mode->max_fps; + + return 0; +} + +static int sc850sl_g_mbus_config(struct v4l2_subdev *sd, unsigned int pad_id, + struct v4l2_mbus_config *config) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + const struct sc850sl_mode *mode = sc850sl->cur_mode; + u32 val = 0; + + if (mode->hdr_mode == NO_HDR) + val = 1 << (SC850SL_4LANES - 1) | + V4L2_MBUS_CSI2_CHANNEL_0 | + V4L2_MBUS_CSI2_CONTINUOUS_CLOCK; + if (mode->hdr_mode == HDR_X2) + val = 1 << (SC850SL_4LANES - 1) | + V4L2_MBUS_CSI2_CHANNEL_0 | + V4L2_MBUS_CSI2_CONTINUOUS_CLOCK | + V4L2_MBUS_CSI2_CHANNEL_1; + + config->type = V4L2_MBUS_CSI2_DPHY; + config->flags = val; + + return 0; +} + +static void sc850sl_get_module_inf(struct sc850sl *sc850sl, + struct rkmodule_inf *inf) +{ + memset(inf, 0, sizeof(*inf)); + strscpy(inf->base.sensor, SC850SL_NAME, sizeof(inf->base.sensor)); + strscpy(inf->base.module, sc850sl->module_name, + sizeof(inf->base.module)); + strscpy(inf->base.lens, sc850sl->len_name, sizeof(inf->base.lens)); +} + +static void sc850sl_get_gain_reg(u32 val, u32 *again_reg, u32 *again_fine_reg, + u32 *dgain_reg) +{ + u8 u8Reg0x3e09 = 0x40, u8Reg0x3e08 = 0x03; + u32 aCoarseGain = 0; + u32 aFineGain = 0; + u32 again = 0; + u32 dgain = 0; + + if (val < 64) + val = 64; + else if (val > SC850SL_GAIN_MAX) + val = SC850SL_GAIN_MAX; + + if (val <= 3199) { + again = val; + dgain = 1; + } else { + again = 3199; + dgain = val / again; + } + + //again + if (again <= 200) { + //a_gain < 3.125x + for (aCoarseGain = 1; aCoarseGain <= 2; aCoarseGain = aCoarseGain * 2) { + //1,2,4,8,16 + if (again < (64 * 2 * aCoarseGain)) + break; + } + aFineGain = again / aCoarseGain; + } else { + for (aCoarseGain = 1; aCoarseGain <= 8; aCoarseGain = aCoarseGain * 2) { + //1,2,4,8 + if (again < (64 * 2 * aCoarseGain * 3125 / 1000)) + break; + } + aFineGain = 1000 * again / aCoarseGain / 3125; + } + for ( ; aCoarseGain >= 2; aCoarseGain = aCoarseGain / 2) + u8Reg0x3e08 = (u8Reg0x3e08 << 1) | 0x01; + + u8Reg0x3e09 = aFineGain; + //dcg = 2.72  -->  2.72*1024=2785.28 + u8Reg0x3e08 = (again > 200) ? (u8Reg0x3e08 | 0x20) : (u8Reg0x3e08 & 0x1f); + + //dgain + if (dgain <= 1) { /*1x ~ 2x*/ + *dgain_reg = 0x00; + } else if (dgain <= 2) { /*2x ~ 4x*/ + *dgain_reg = 0x01; + } else if (dgain <= 4) { /*4x ~ 8x*/ + *dgain_reg = 0x03; + } else { + *dgain_reg = 0x07; + } + + *again_reg = u8Reg0x3e08; + *again_fine_reg = u8Reg0x3e09; +} + +static int sc850sl_get_channel_info(struct sc850sl *sc850sl, struct rkmodule_channel_info *ch_info) +{ + if (ch_info->index < PAD0 || ch_info->index >= PAD_MAX) + return -EINVAL; + ch_info->vc = sc850sl->cur_mode->vc[ch_info->index]; + ch_info->width = sc850sl->cur_mode->width; + ch_info->height = sc850sl->cur_mode->height; + ch_info->bus_fmt = sc850sl->cur_mode->bus_fmt; + return 0; +} + +static long sc850sl_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + struct rkmodule_hdr_cfg *hdr_cfg; + const struct sc850sl_mode *mode; + struct rkmodule_channel_info *ch_info; + long ret = 0; + u64 pixel_rate = 0; + u32 i, h, w, stream; + + switch (cmd) { + case PREISP_CMD_SET_HDRAE_EXP: + /* + * ret = sc850sl_set_hdrae(sc850sl, arg); + */ + break; + + case RKMODULE_SET_HDR_CFG: + hdr_cfg = (struct rkmodule_hdr_cfg *)arg; + if (sc850sl->streaming) { + ret = sc850sl_write_array(sc850sl->client, sc850sl->cur_mode->reg_list); + if (ret) + return ret; + } + w = sc850sl->cur_mode->width; + h = sc850sl->cur_mode->height; + for (i = 0; i < sc850sl->cfg_num; i++) { + if (w == supported_modes[i].width && + h == supported_modes[i].height && + supported_modes[i].hdr_mode == hdr_cfg->hdr_mode) { + sc850sl_change_mode(sc850sl, &supported_modes[i]); + break; + } + } + if (i == sc850sl->cfg_num) { + dev_err(&sc850sl->client->dev, + "not find hdr mode:%d %dx%d config\n", + hdr_cfg->hdr_mode, w, h); + ret = -EINVAL; + } else { + mode = sc850sl->cur_mode; + w = mode->hts_def - mode->width; + h = mode->vts_def - mode->height; + __v4l2_ctrl_modify_range(sc850sl->hblank, w, w, 1, w); + __v4l2_ctrl_modify_range(sc850sl->vblank, h, + SC850SL_VTS_MAX - mode->height, + 1, h); + __v4l2_ctrl_s_ctrl(sc850sl->link_freq, mode->mipi_freq_idx); + pixel_rate = (u32)link_freq_items[mode->mipi_freq_idx] / + mode->bpp * 2 * SC850SL_4LANES; + __v4l2_ctrl_s_ctrl_int64(sc850sl->pixel_rate, + pixel_rate); + sc850sl->cur_fps = mode->max_fps; + sc850sl->cur_vts = mode->vts_def; + dev_info(&sc850sl->client->dev, + "sensor mode: %d\n", mode->hdr_mode); + } + break; + case RKMODULE_GET_MODULE_INFO: + sc850sl_get_module_inf(sc850sl, (struct rkmodule_inf *)arg); + break; + + case RKMODULE_GET_HDR_CFG: + hdr_cfg = (struct rkmodule_hdr_cfg *)arg; + hdr_cfg->esp.mode = HDR_NORMAL_VC; + hdr_cfg->hdr_mode = sc850sl->cur_mode->hdr_mode; + break; + + case RKMODULE_SET_QUICK_STREAM: + stream = *((u32 *)arg); + + if (stream) + ret = sc850sl_write_reg(sc850sl->client, SC850SL_REG_CTRL_MODE, + SC850SL_REG_VALUE_08BIT, SC850SL_MODE_STREAMING); + else + ret = sc850sl_write_reg(sc850sl->client, SC850SL_REG_CTRL_MODE, + SC850SL_REG_VALUE_08BIT, SC850SL_MODE_SW_STANDBY); + break; + + case RKMODULE_GET_CHANNEL_INFO: + ch_info = (struct rkmodule_channel_info *)arg; + ret = sc850sl_get_channel_info(sc850sl, ch_info); + break; + + default: + ret = -ENOIOCTLCMD; + break; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +static long sc850sl_compat_ioctl32(struct v4l2_subdev *sd, + unsigned int cmd, unsigned long arg) +{ + void __user *up = compat_ptr(arg); + struct rkmodule_inf *inf; + struct rkmodule_awb_cfg *cfg; + struct rkmodule_hdr_cfg *hdr; + struct preisp_hdrae_exp_s *hdrae; + struct rkmodule_channel_info *ch_info; + long ret; + u32 stream; + u32 brl = 0; + struct rkmodule_csi_dphy_param *dphy_param; + + switch (cmd) { + case RKMODULE_GET_MODULE_INFO: + inf = kzalloc(sizeof(*inf), GFP_KERNEL); + if (!inf) { + ret = -ENOMEM; + return ret; + } + + ret = sc850sl_ioctl(sd, cmd, inf); + if (!ret) { + if (copy_to_user(up, inf, sizeof(*inf))) { + kfree(inf); + return -EFAULT; + } + } + kfree(inf); + break; + case RKMODULE_AWB_CFG: + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) { + ret = -ENOMEM; + return ret; + } + + if (copy_from_user(cfg, up, sizeof(*cfg))) { + kfree(cfg); + return -EFAULT; + } + ret = sc850sl_ioctl(sd, cmd, cfg); + kfree(cfg); + break; + case RKMODULE_GET_HDR_CFG: + hdr = kzalloc(sizeof(*hdr), GFP_KERNEL); + if (!hdr) { + ret = -ENOMEM; + return ret; + } + + ret = sc850sl_ioctl(sd, cmd, hdr); + if (!ret) { + if (copy_to_user(up, hdr, sizeof(*hdr))) { + kfree(hdr); + return -EFAULT; + } + } + kfree(hdr); + break; + case RKMODULE_SET_HDR_CFG: + hdr = kzalloc(sizeof(*hdr), GFP_KERNEL); + if (!hdr) { + ret = -ENOMEM; + return ret; + } + + if (copy_from_user(hdr, up, sizeof(*hdr))) { + kfree(hdr); + return -EFAULT; + } + ret = sc850sl_ioctl(sd, cmd, hdr); + kfree(hdr); + break; + case PREISP_CMD_SET_HDRAE_EXP: + hdrae = kzalloc(sizeof(*hdrae), GFP_KERNEL); + if (!hdrae) { + ret = -ENOMEM; + return ret; + } + + if (copy_from_user(hdrae, up, sizeof(*hdrae))) { + kfree(hdrae); + return -EFAULT; + } + ret = sc850sl_ioctl(sd, cmd, hdrae); + kfree(hdrae); + break; + case RKMODULE_SET_QUICK_STREAM: + if (copy_from_user(&stream, up, sizeof(u32))) + return -EFAULT; + ret = sc850sl_ioctl(sd, cmd, &stream); + break; + case RKMODULE_GET_SONY_BRL: + ret = sc850sl_ioctl(sd, cmd, &brl); + if (!ret) { + if (copy_to_user(up, &brl, sizeof(u32))) + return -EFAULT; + } + break; + case RKMODULE_GET_CHANNEL_INFO: + ch_info = kzalloc(sizeof(*ch_info), GFP_KERNEL); + if (!ch_info) { + ret = -ENOMEM; + return ret; + } + + ret = sc850sl_ioctl(sd, cmd, ch_info); + if (!ret) { + ret = copy_to_user(up, ch_info, sizeof(*ch_info)); + if (ret) + ret = -EFAULT; + } + kfree(ch_info); + break; + case RKMODULE_GET_CSI_DPHY_PARAM: + dphy_param = kzalloc(sizeof(*dphy_param), GFP_KERNEL); + if (!dphy_param) { + ret = -ENOMEM; + return ret; + } + + ret = sc850sl_ioctl(sd, cmd, dphy_param); + if (!ret) { + ret = copy_to_user(up, dphy_param, sizeof(*dphy_param)); + if (ret) + ret = -EFAULT; + } + kfree(dphy_param); + break; + + default: + ret = -ENOIOCTLCMD; + break; + } + + return ret; +} +#endif + + +static int __sc850sl_start_stream(struct sc850sl *sc850sl) +{ + int ret; + + ret = sc850sl_write_array(sc850sl->client, sc850sl->cur_mode->reg_list); + if (ret) + return ret; + + ret = __v4l2_ctrl_handler_setup(&sc850sl->ctrl_handler); + if (ret) + return ret; + /* In case these controls are set before streaming */ + if (sc850sl->has_init_exp && sc850sl->cur_mode->hdr_mode != NO_HDR) { + ret = sc850sl_ioctl(&sc850sl->subdev, PREISP_CMD_SET_HDRAE_EXP, + &sc850sl->init_hdrae_exp); + if (ret) { + dev_err(&sc850sl->client->dev, + "init exp fail in hdr mode\n"); + return ret; + } + } + return sc850sl_write_reg(sc850sl->client, SC850SL_REG_CTRL_MODE, + SC850SL_REG_VALUE_08BIT, SC850SL_MODE_STREAMING); +} + +static int __sc850sl_stop_stream(struct sc850sl *sc850sl) +{ + sc850sl->has_init_exp = false; + return sc850sl_write_reg(sc850sl->client, SC850SL_REG_CTRL_MODE, + SC850SL_REG_VALUE_08BIT, SC850SL_MODE_SW_STANDBY); +} + +static int sc850sl_s_stream(struct v4l2_subdev *sd, int on) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + struct i2c_client *client = sc850sl->client; + int ret = 0; + + dev_info(&sc850sl->client->dev, "s_stream: %d. %dx%d, hdr: %d, bpp: %d\n", + on, sc850sl->cur_mode->width, sc850sl->cur_mode->height, + sc850sl->cur_mode->hdr_mode, sc850sl->cur_mode->bpp); + + mutex_lock(&sc850sl->mutex); + on = !!on; + if (on == sc850sl->streaming) + goto unlock_and_return; + + if (on) { + ret = pm_runtime_get_sync(&client->dev); + if (ret < 0) { + pm_runtime_put_noidle(&client->dev); + goto unlock_and_return; + } + ret = __sc850sl_start_stream(sc850sl); + if (ret) { + v4l2_err(sd, "start stream failed while write regs\n"); + pm_runtime_put(&client->dev); + goto unlock_and_return; + } + } else { + __sc850sl_stop_stream(sc850sl); + pm_runtime_put(&client->dev); + } + + sc850sl->streaming = on; + +unlock_and_return: + mutex_unlock(&sc850sl->mutex); + return ret; +} + +static int sc850sl_s_power(struct v4l2_subdev *sd, int on) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + struct i2c_client *client = sc850sl->client; + int ret = 0; + + mutex_lock(&sc850sl->mutex); + + /* If the power state is not modified - no work to do. */ + if (sc850sl->power_on == !!on) + goto unlock_and_return; + + if (on) { + ret = pm_runtime_get_sync(&client->dev); + if (ret < 0) { + pm_runtime_put_noidle(&client->dev); + goto unlock_and_return; + } + + ret |= sc850sl_write_reg(sc850sl->client, + SC850SL_SOFTWARE_RESET_REG, + SC850SL_REG_VALUE_08BIT, + 0x01); + /* + * usleep_range(100, 200); + * ret |= sc850sl_write_reg(sc2310->client, + * 0x303f, + * SC850SL_REG_VALUE_08BIT, + * 0x01); + */ + sc850sl->power_on = true; + } else { + pm_runtime_put(&client->dev); + sc850sl->power_on = false; + } + +unlock_and_return: + mutex_unlock(&sc850sl->mutex); + + return ret; +} + +static int __sc850sl_power_on(struct sc850sl *sc850sl) +{ + int ret; + struct device *dev = &sc850sl->client->dev; + + if (!IS_ERR_OR_NULL(sc850sl->pins_default)) { + ret = pinctrl_select_state(sc850sl->pinctrl, sc850sl->pins_default); + if (ret < 0) + dev_err(dev, "could not set pins\n"); + } + + if (!IS_ERR(sc850sl->power_gpio)) + gpiod_direction_output(sc850sl->power_gpio, 1); + + usleep_range(4000, 6000); + if (!IS_ERR(sc850sl->reset_gpio)) + gpiod_direction_output(sc850sl->reset_gpio, 0); + + usleep_range(4000, 6000); + ret = clk_set_rate(sc850sl->xvclk, SC850SL_XVCLK_FREQ_24M); + if (ret < 0) + dev_warn(dev, "Failed to set xvclk rate 24MHz\n"); + if (clk_get_rate(sc850sl->xvclk) != SC850SL_XVCLK_FREQ_24M) + dev_warn(dev, "xvclk mismatched\n"); + ret = clk_prepare_enable(sc850sl->xvclk); + if (ret < 0) { + dev_err(dev, "Failed to enable xvclk\n"); + goto err_clk; + } + + ret = regulator_bulk_enable(SC850SL_NUM_SUPPLIES, sc850sl->supplies); + if (ret < 0) { + dev_err(dev, "Failed to enable regulators\n"); + goto disable_clk; + } + + usleep_range(4000, 6000); + return 0; +err_clk: + if (!IS_ERR(sc850sl->reset_gpio)) + gpiod_direction_output(sc850sl->reset_gpio, 1); +disable_clk: + clk_disable_unprepare(sc850sl->xvclk); + + return ret; +} + +static void __sc850sl_power_off(struct sc850sl *sc850sl) +{ + int ret; + struct device *dev = &sc850sl->client->dev; + + if (!IS_ERR(sc850sl->reset_gpio)) + gpiod_direction_output(sc850sl->reset_gpio, 1); + clk_disable_unprepare(sc850sl->xvclk); + if (!IS_ERR_OR_NULL(sc850sl->pins_sleep)) { + ret = pinctrl_select_state(sc850sl->pinctrl, + sc850sl->pins_sleep); + if (ret < 0) + dev_dbg(dev, "could not set pins\n"); + } + if (!IS_ERR(sc850sl->power_gpio)) + gpiod_direction_output(sc850sl->power_gpio, 0); + regulator_bulk_disable(SC850SL_NUM_SUPPLIES, sc850sl->supplies); +} + +static int sc850sl_runtime_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct v4l2_subdev *sd = i2c_get_clientdata(client); + struct sc850sl *sc850sl = to_sc850sl(sd); + + return __sc850sl_power_on(sc850sl); +} + +static int sc850sl_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct v4l2_subdev *sd = i2c_get_clientdata(client); + struct sc850sl *sc850sl = to_sc850sl(sd); + + __sc850sl_power_off(sc850sl); + + return 0; +} + +#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API +static int sc850sl_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + struct v4l2_mbus_framefmt *try_fmt = + v4l2_subdev_get_try_format(sd, fh->pad, 0); + const struct sc850sl_mode *def_mode = &supported_modes[0]; + + mutex_lock(&sc850sl->mutex); + /* Initialize try_fmt */ + try_fmt->width = def_mode->width; + try_fmt->height = def_mode->height; + try_fmt->code = def_mode->bus_fmt; + try_fmt->field = V4L2_FIELD_NONE; + + mutex_unlock(&sc850sl->mutex); + /* No crop or compose */ + + return 0; +} +#endif + +static int sc850sl_enum_frame_interval(struct v4l2_subdev *sd, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_frame_interval_enum *fie) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + + if (fie->index >= sc850sl->cfg_num) + return -EINVAL; + + fie->code = supported_modes[fie->index].bus_fmt; + fie->width = supported_modes[fie->index].width; + fie->height = supported_modes[fie->index].height; + fie->interval = supported_modes[fie->index].max_fps; + fie->reserved[0] = supported_modes[fie->index].hdr_mode; + return 0; +} + +#define CROP_START(SRC, DST) (((SRC) - (DST)) / 2 / 4 * 4) +#define DST_WIDTH_3840 3840 +#define DST_HEIGHT_2160 2160 +#define DST_WIDTH_1920 1920 +#define DST_HEIGHT_1080 1080 + +static int sc850sl_get_selection(struct v4l2_subdev *sd, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_selection *sel) +{ + struct sc850sl *sc850sl = to_sc850sl(sd); + + if (sel->target == V4L2_SEL_TGT_CROP_BOUNDS) { + if (sc850sl->cur_mode->width == 3856) { + sel->r.left = CROP_START(sc850sl->cur_mode->width, DST_WIDTH_3840); + sel->r.width = DST_WIDTH_3840; + sel->r.top = CROP_START(sc850sl->cur_mode->height, DST_HEIGHT_2160); + sel->r.height = DST_HEIGHT_2160; + } else if (sc850sl->cur_mode->width == 1944) { + sel->r.left = CROP_START(sc850sl->cur_mode->width, DST_WIDTH_1920); + sel->r.width = DST_WIDTH_1920; + sel->r.top = CROP_START(sc850sl->cur_mode->height, DST_HEIGHT_1080); + sel->r.height = DST_HEIGHT_1080; + } else { + sel->r.left = CROP_START(sc850sl->cur_mode->width, + sc850sl->cur_mode->width); + sel->r.width = sc850sl->cur_mode->width; + sel->r.top = CROP_START(sc850sl->cur_mode->height, + sc850sl->cur_mode->height); + sel->r.height = sc850sl->cur_mode->height; + } + return 0; + } + return -EINVAL; +} + +static const struct dev_pm_ops sc850sl_pm_ops = { + SET_RUNTIME_PM_OPS(sc850sl_runtime_suspend, + sc850sl_runtime_resume, NULL) +}; + +#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API +static const struct v4l2_subdev_internal_ops sc850sl_internal_ops = { + .open = sc850sl_open, +}; +#endif + +static const struct v4l2_subdev_core_ops sc850sl_core_ops = { + .s_power = sc850sl_s_power, + .ioctl = sc850sl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl32 = sc850sl_compat_ioctl32, +#endif +}; + +static const struct v4l2_subdev_video_ops sc850sl_video_ops = { + .s_stream = sc850sl_s_stream, + .g_frame_interval = sc850sl_g_frame_interval, +}; + +static const struct v4l2_subdev_pad_ops sc850sl_pad_ops = { + .enum_mbus_code = sc850sl_enum_mbus_code, + .enum_frame_size = sc850sl_enum_frame_sizes, + .enum_frame_interval = sc850sl_enum_frame_interval, + .get_fmt = sc850sl_get_fmt, + .set_fmt = sc850sl_set_fmt, + .get_selection = sc850sl_get_selection, + .get_mbus_config = sc850sl_g_mbus_config, +}; + +static const struct v4l2_subdev_ops sc850sl_subdev_ops = { + .core = &sc850sl_core_ops, + .video = &sc850sl_video_ops, + .pad = &sc850sl_pad_ops, +}; + +static void sc850sl_modify_fps_info(struct sc850sl *sc850sl) +{ + const struct sc850sl_mode *mode = sc850sl->cur_mode; + + sc850sl->cur_fps.denominator = mode->max_fps.denominator * sc850sl->cur_vts / + mode->vts_def; +} + +static int sc850sl_set_ctrl(struct v4l2_ctrl *ctrl) +{ + struct sc850sl *sc850sl = container_of(ctrl->handler, + struct sc850sl, ctrl_handler); + struct i2c_client *client = sc850sl->client; + s64 max; + u32 again, again_fine, dgain; + int ret = 0; + u32 val; + + /* Propagate change of current control to all related controls */ + switch (ctrl->id) { + case V4L2_CID_VBLANK: + /* Update max exposure while meeting expected vblanking */ + max = sc850sl->cur_mode->height + ctrl->val - 8; + __v4l2_ctrl_modify_range(sc850sl->exposure, + sc850sl->exposure->minimum, max, + sc850sl->exposure->step, + sc850sl->exposure->default_value); + break; + } + + if (!pm_runtime_get_if_in_use(&client->dev)) + return 0; + + switch (ctrl->id) { + case V4L2_CID_EXPOSURE: + if (sc850sl->cur_mode->hdr_mode != NO_HDR) + goto out_ctrl; + ret = sc850sl_write_reg(sc850sl->client, + SC850SL_REG_EXP_LONG_H, + SC850SL_REG_VALUE_08BIT, + SC850SL_FETCH_EXP_H(ctrl->val)); + ret |= sc850sl_write_reg(sc850sl->client, + SC850SL_REG_EXP_LONG_M, + SC850SL_REG_VALUE_08BIT, + SC850SL_FETCH_EXP_M(ctrl->val)); + ret |= sc850sl_write_reg(sc850sl->client, + SC850SL_REG_EXP_LONG_L, + SC850SL_REG_VALUE_08BIT, + SC850SL_FETCH_EXP_L(ctrl->val)); + + dev_dbg(&client->dev, "set exposure 0x%x\n", + ctrl->val); + break; + case V4L2_CID_ANALOGUE_GAIN: + if (sc850sl->cur_mode->hdr_mode != NO_HDR) + goto out_ctrl; + sc850sl_get_gain_reg(ctrl->val, &again, &again_fine, &dgain); + dev_dbg(&client->dev, "recv_gain:%d set again 0x%x, again_fine 0x%x, set dgain 0x%x\n", + ctrl->val, again, again_fine, dgain); + + ret |= sc850sl_write_reg(sc850sl->client, + SC850SL_REG_AGAIN, + SC850SL_REG_VALUE_08BIT, + again); + ret |= sc850sl_write_reg(sc850sl->client, + SC850SL_REG_AGAIN_FINE, + SC850SL_REG_VALUE_08BIT, + again_fine); + ret |= sc850sl_write_reg(sc850sl->client, + SC850SL_REG_DGAIN, + SC850SL_REG_VALUE_08BIT, + dgain); + break; + case V4L2_CID_VBLANK: + ret = sc850sl_write_reg(sc850sl->client, SC850SL_REG_VTS, + SC850SL_REG_VALUE_16BIT, + ctrl->val + sc850sl->cur_mode->height); + if (!ret) + sc850sl->cur_vts = ctrl->val + sc850sl->cur_mode->height; + if (sc850sl->cur_vts != sc850sl->cur_mode->vts_def) + sc850sl_modify_fps_info(sc850sl); + dev_dbg(&client->dev, "set vblank 0x%x\n", + ctrl->val); + break; + case V4L2_CID_TEST_PATTERN: + ret = sc850sl_enable_test_pattern(sc850sl, ctrl->val); + break; + case V4L2_CID_HFLIP: + ret = sc850sl_read_reg(sc850sl->client, SC850SL_FLIP_REG, + SC850SL_REG_VALUE_08BIT, &val); + if (ret) + break; + if (ctrl->val) + val |= SC850SL_MIRROR_MASK; + else + val &= ~SC850SL_MIRROR_MASK; + ret |= sc850sl_write_reg(sc850sl->client, SC850SL_FLIP_REG, + SC850SL_REG_VALUE_08BIT, val); + break; + case V4L2_CID_VFLIP: + ret = sc850sl_read_reg(sc850sl->client, SC850SL_FLIP_REG, + SC850SL_REG_VALUE_08BIT, &val); + if (ret) + break; + if (ctrl->val) + val |= SC850SL_FLIP_MASK; + else + val &= ~SC850SL_FLIP_MASK; + ret |= sc850sl_write_reg(sc850sl->client, SC850SL_FLIP_REG, + SC850SL_REG_VALUE_08BIT, val); + break; + default: + dev_warn(&client->dev, "%s Unhandled id:0x%x, val:0x%x\n", + __func__, ctrl->id, ctrl->val); + break; + } + +out_ctrl: + pm_runtime_put(&client->dev); + + return ret; +} + +static const struct v4l2_ctrl_ops sc850sl_ctrl_ops = { + .s_ctrl = sc850sl_set_ctrl, +}; + +static int sc850sl_initialize_controls(struct sc850sl *sc850sl) +{ + const struct sc850sl_mode *mode; + struct v4l2_ctrl_handler *handler; + s64 exposure_max, vblank_def; + u64 pixel_rate = 0; + u32 h_blank; + int ret; + + handler = &sc850sl->ctrl_handler; + mode = sc850sl->cur_mode; + ret = v4l2_ctrl_handler_init(handler, 9); + if (ret) + return ret; + handler->lock = &sc850sl->mutex; + + sc850sl->link_freq = v4l2_ctrl_new_int_menu(handler, NULL, + V4L2_CID_LINK_FREQ, 0, 0, link_freq_items); + v4l2_ctrl_s_ctrl(sc850sl->link_freq, mode->mipi_freq_idx); + + /* pixel rate = link frequency * 2 * lanes / BITS_PER_SAMPLE */ + pixel_rate = (u32)link_freq_items[mode->mipi_freq_idx] / mode->bpp * 2 * SC850SL_4LANES; + sc850sl->pixel_rate = v4l2_ctrl_new_std(handler, NULL, + V4L2_CID_PIXEL_RATE, 0, SC850SL_MAX_PIXEL_RATE, + 1, pixel_rate); + + h_blank = mode->hts_def - mode->width; + sc850sl->hblank = v4l2_ctrl_new_std(handler, NULL, V4L2_CID_HBLANK, + h_blank, h_blank, 1, h_blank); + if (sc850sl->hblank) + sc850sl->hblank->flags |= V4L2_CTRL_FLAG_READ_ONLY; + + vblank_def = mode->vts_def - mode->height; + sc850sl->vblank = v4l2_ctrl_new_std(handler, &sc850sl_ctrl_ops, + V4L2_CID_VBLANK, vblank_def, + SC850SL_VTS_MAX - mode->height, + 1, vblank_def); + + exposure_max = mode->vts_def - 4; /*vts_def 0x08ca=2250*/ + sc850sl->exposure = v4l2_ctrl_new_std(handler, &sc850sl_ctrl_ops, + V4L2_CID_EXPOSURE, SC850SL_EXPOSURE_MIN, + exposure_max, SC850SL_EXPOSURE_STEP, + mode->exp_def); /*exp_def 0x08c0=2240*/ + + sc850sl->anal_a_gain = v4l2_ctrl_new_std(handler, &sc850sl_ctrl_ops, + V4L2_CID_ANALOGUE_GAIN, SC850SL_GAIN_MIN, + SC850SL_GAIN_MAX, SC850SL_GAIN_STEP, + SC850SL_GAIN_DEFAULT); + + sc850sl->test_pattern = v4l2_ctrl_new_std_menu_items(handler, + &sc850sl_ctrl_ops, V4L2_CID_TEST_PATTERN, + ARRAY_SIZE(sc850sl_test_pattern_menu) - 1, + 0, 0, sc850sl_test_pattern_menu); + + v4l2_ctrl_new_std(handler, &sc850sl_ctrl_ops, V4L2_CID_HFLIP, 0, 1, 1, 0); + v4l2_ctrl_new_std(handler, &sc850sl_ctrl_ops, V4L2_CID_VFLIP, 0, 1, 1, 0); + + if (handler->error) { + ret = handler->error; + dev_err(&sc850sl->client->dev, + "Failed to init controls(%d)\n", ret); + goto err_free_handler; + } + + sc850sl->subdev.ctrl_handler = handler; + sc850sl->has_init_exp = false; + sc850sl->cur_fps = mode->max_fps; + sc850sl->cur_vts = mode->vts_def; + + return 0; + +err_free_handler: + v4l2_ctrl_handler_free(handler); + + return ret; +} + +static int sc850sl_check_sensor_id(struct sc850sl *sc850sl, + struct i2c_client *client) +{ + struct device *dev = &sc850sl->client->dev; + u32 id = 0; + int ret; + + ret = sc850sl_read_reg(client, SC850SL_REG_CHIP_ID, + SC850SL_REG_VALUE_16BIT, &id); + if (id != CHIP_ID) { + dev_err(dev, "Unexpected sensor id(%06x), ret(%d)\n", id, ret); + return -ENODEV; + } + + dev_info(dev, "Detected sc850sl id %06x\n", CHIP_ID); + + return 0; +} + +static int sc850sl_configure_regulators(struct sc850sl *sc850sl) +{ + unsigned int i; + + for (i = 0; i < SC850SL_NUM_SUPPLIES; i++) + sc850sl->supplies[i].supply = sc850sl_supply_names[i]; + + return devm_regulator_bulk_get(&sc850sl->client->dev, + SC850SL_NUM_SUPPLIES, + sc850sl->supplies); +} + +static int sc850sl_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct device_node *node = dev->of_node; + struct sc850sl *sc850sl; + struct v4l2_subdev *sd; + char facing[2]; + int ret; + u32 i, hdr_mode = 0; + + dev_info(dev, "driver version: %02x.%02x.%02x", + DRIVER_VERSION >> 16, + (DRIVER_VERSION & 0xff00) >> 8, + DRIVER_VERSION & 0x00ff); + + sc850sl = devm_kzalloc(dev, sizeof(*sc850sl), GFP_KERNEL); + if (!sc850sl) + return -ENOMEM; + + ret = of_property_read_u32(node, RKMODULE_CAMERA_MODULE_INDEX, + &sc850sl->module_index); + ret |= of_property_read_string(node, RKMODULE_CAMERA_MODULE_FACING, + &sc850sl->module_facing); + ret |= of_property_read_string(node, RKMODULE_CAMERA_MODULE_NAME, + &sc850sl->module_name); + ret |= of_property_read_string(node, RKMODULE_CAMERA_LENS_NAME, + &sc850sl->len_name); + if (ret) { + dev_err(dev, "could not get module information!\n"); + return -EINVAL; + } + + ret = of_property_read_u32(node, OF_CAMERA_HDR_MODE, &hdr_mode); + if (ret) { + hdr_mode = NO_HDR; + dev_warn(dev, " Get hdr mode failed! no hdr default\n"); + } + + sc850sl->client = client; + sc850sl->cfg_num = ARRAY_SIZE(supported_modes); + for (i = 0; i < sc850sl->cfg_num; i++) { + if (hdr_mode == supported_modes[i].hdr_mode) { + sc850sl->cur_mode = &supported_modes[i]; + break; + } + } + + sc850sl->xvclk = devm_clk_get(dev, "xvclk"); + if (IS_ERR(sc850sl->xvclk)) { + dev_err(dev, "Failed to get xvclk\n"); + return -EINVAL; + } + + sc850sl->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_ASIS); + if (IS_ERR(sc850sl->reset_gpio)) + dev_warn(dev, "Failed to get reset-gpios\n"); + sc850sl->power_gpio = devm_gpiod_get(dev, "power", GPIOD_ASIS); + if (IS_ERR(sc850sl->power_gpio)) + dev_warn(dev, "Failed to get power_gpios\n"); + + sc850sl->pinctrl = devm_pinctrl_get(dev); + if (!IS_ERR(sc850sl->pinctrl)) { + sc850sl->pins_default = + pinctrl_lookup_state(sc850sl->pinctrl, + OF_CAMERA_PINCTRL_STATE_DEFAULT); + if (IS_ERR(sc850sl->pins_default)) + dev_info(dev, "could not get default pinstate\n"); + + sc850sl->pins_sleep = + pinctrl_lookup_state(sc850sl->pinctrl, + OF_CAMERA_PINCTRL_STATE_SLEEP); + if (IS_ERR(sc850sl->pins_sleep)) + dev_info(dev, "could not get sleep pinstate\n"); + } else { + dev_info(dev, "no pinctrl\n"); + } + + ret = sc850sl_configure_regulators(sc850sl); + if (ret) { + dev_err(dev, "Failed to get power regulators\n"); + return ret; + } + + mutex_init(&sc850sl->mutex); + + sd = &sc850sl->subdev; + v4l2_i2c_subdev_init(sd, client, &sc850sl_subdev_ops); + ret = sc850sl_initialize_controls(sc850sl); + if (ret) + goto err_destroy_mutex; + + ret = __sc850sl_power_on(sc850sl); + if (ret) + goto err_free_handler; + + ret = sc850sl_check_sensor_id(sc850sl, client); + if (ret) + goto err_power_off; +#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API + sd->internal_ops = &sc850sl_internal_ops; + sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE | + V4L2_SUBDEV_FL_HAS_EVENTS; +#endif +#if defined(CONFIG_MEDIA_CONTROLLER) + sc850sl->pad.flags = MEDIA_PAD_FL_SOURCE; + sd->entity.function = MEDIA_ENT_F_CAM_SENSOR; + ret = media_entity_pads_init(&sd->entity, 1, &sc850sl->pad); + if (ret < 0) + goto err_power_off; +#endif + + memset(facing, 0, sizeof(facing)); + if (strcmp(sc850sl->module_facing, "back") == 0) + facing[0] = 'b'; + else + facing[0] = 'f'; + snprintf(sd->name, sizeof(sd->name), "m%02d_%s_%s %s", + sc850sl->module_index, facing, + SC850SL_NAME, dev_name(sd->dev)); + + ret = v4l2_async_register_subdev_sensor_common(sd); + if (ret) { + dev_err(dev, "v4l2 async register subdev failed\n"); + goto err_clean_entity; + } + + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + pm_runtime_idle(dev); + + return 0; + +err_clean_entity: +#if defined(CONFIG_MEDIA_CONTROLLER) + media_entity_cleanup(&sd->entity); +#endif +err_power_off: + __sc850sl_power_off(sc850sl); +err_free_handler: + v4l2_ctrl_handler_free(&sc850sl->ctrl_handler); +err_destroy_mutex: + mutex_destroy(&sc850sl->mutex); + + return ret; +} + +static int sc850sl_remove(struct i2c_client *client) +{ + struct v4l2_subdev *sd = i2c_get_clientdata(client); + struct sc850sl *sc850sl = to_sc850sl(sd); + + v4l2_async_unregister_subdev(sd); +#if defined(CONFIG_MEDIA_CONTROLLER) + media_entity_cleanup(&sd->entity); +#endif + v4l2_ctrl_handler_free(&sc850sl->ctrl_handler); + mutex_destroy(&sc850sl->mutex); + + pm_runtime_disable(&client->dev); + if (!pm_runtime_status_suspended(&client->dev)) + __sc850sl_power_off(sc850sl); + pm_runtime_set_suspended(&client->dev); + + return 0; +} + +#if IS_ENABLED(CONFIG_OF) +static const struct of_device_id sc850sl_of_match[] = { + { .compatible = "smartsens,sc850sl" }, + {}, +}; +MODULE_DEVICE_TABLE(of, sc850sl_of_match); +#endif + +static const struct i2c_device_id sc850sl_match_id[] = { + { "smartsens,sc850sl", 0 }, + { }, +}; + +static struct i2c_driver sc850sl_i2c_driver = { + .driver = { + .name = SC850SL_NAME, + .pm = &sc850sl_pm_ops, + .of_match_table = of_match_ptr(sc850sl_of_match), + }, + .probe = &sc850sl_probe, + .remove = &sc850sl_remove, + .id_table = sc850sl_match_id, +}; + +static int __init sensor_mod_init(void) +{ + return i2c_add_driver(&sc850sl_i2c_driver); +} + +static void __exit sensor_mod_exit(void) +{ + i2c_del_driver(&sc850sl_i2c_driver); +} + +device_initcall_sync(sensor_mod_init); +module_exit(sensor_mod_exit); + +MODULE_DESCRIPTION("smartsens,sc850sl sensor driver"); +MODULE_LICENSE("GPL"); From 25b2037af22f36400c9004481b9e44c02c30c8e1 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Fri, 10 Feb 2023 12:01:52 +0800 Subject: [PATCH 29/79] MALI: rockchip: upgrade bifrost DDK to g17p0-01eac0, from g15p0-01eac0 In addition, add some more modifications, according to commit ccf3f0670c36 ("MALI: bifrost: from ARM: Remove references to PageMovable()"). Note, the corresponding mali_csffw.bin for DDK g15 MUST be used. Change-Id: Ie233cd29d8d169202d5b80b00a97ccb90e6bd3f2 Signed-off-by: Zhen Chen --- .../sysfs-device-mali-coresight-source | 113 +++ Documentation/csf_sync_state_dump.txt | 111 +++ .../devicetree/bindings/arm/mali-bifrost.txt | 1 + .../bindings/arm/mali-coresight-source.txt | 160 +++ Documentation/dma-buf-test-exporter.txt | 4 +- drivers/base/arm/Makefile | 9 +- drivers/base/arm/Mconfig | 64 -- .../memory_group_manager.c | 4 +- drivers/gpu/arm/bifrost/Kbuild | 2 +- drivers/gpu/arm/bifrost/Kconfig | 71 +- drivers/gpu/arm/bifrost/Makefile | 42 +- drivers/gpu/arm/bifrost/Mconfig | 326 ------- drivers/gpu/arm/bifrost/backend/gpu/Kbuild | 8 +- .../backend/gpu/mali_kbase_irq_linux.c | 10 +- .../bifrost/backend/gpu/mali_kbase_jm_as.c | 8 +- .../bifrost/backend/gpu/mali_kbase_jm_hw.c | 46 +- .../backend/gpu/mali_kbase_jm_internal.h | 41 +- .../bifrost/backend/gpu/mali_kbase_jm_rb.c | 113 +-- .../bifrost/backend/gpu/mali_kbase_jm_rb.h | 14 +- .../backend/gpu/mali_kbase_js_backend.c | 5 +- .../backend/gpu/mali_kbase_model_dummy.c | 73 +- .../backend/gpu/mali_kbase_model_dummy.h | 29 +- .../gpu/mali_kbase_model_error_generator.c | 2 +- .../backend/gpu/mali_kbase_model_linux.c | 33 +- .../backend/gpu/mali_kbase_model_linux.h | 125 ++- .../bifrost/backend/gpu/mali_kbase_pm_ca.c | 4 +- .../backend/gpu/mali_kbase_pm_driver.c | 82 +- .../backend/gpu/mali_kbase_pm_internal.h | 23 + .../backend/gpu/mali_kbase_pm_mcu_states.h | 16 +- .../backend/gpu/mali_kbase_pm_metrics.c | 2 +- drivers/gpu/arm/bifrost/build.bp | 5 +- .../context/backend/mali_kbase_context_csf.c | 2 + .../arm/bifrost/context/mali_kbase_context.c | 11 - drivers/gpu/arm/bifrost/csf/Kbuild | 14 +- drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c | 588 +++++------ drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h | 41 +- .../bifrost/csf/mali_kbase_csf_csg_debugfs.c | 24 +- .../bifrost/csf/mali_kbase_csf_csg_debugfs.h | 9 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_defs.h | 105 +- .../arm/bifrost/csf/mali_kbase_csf_firmware.c | 288 +++++- .../arm/bifrost/csf/mali_kbase_csf_firmware.h | 56 +- .../csf/mali_kbase_csf_firmware_core_dump.c | 807 ++++++++++++++++ .../csf/mali_kbase_csf_firmware_core_dump.h | 65 ++ .../bifrost/csf/mali_kbase_csf_firmware_log.c | 4 +- .../bifrost/csf/mali_kbase_csf_firmware_log.h | 3 + .../csf/mali_kbase_csf_firmware_no_mali.c | 93 +- .../csf/mali_kbase_csf_heap_context_alloc.c | 82 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c | 415 ++++++-- .../gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h | 22 +- .../csf/mali_kbase_csf_mcu_shared_reg.c | 815 ++++++++++++++++ .../csf/mali_kbase_csf_mcu_shared_reg.h | 139 +++ .../bifrost/csf/mali_kbase_csf_registers.h | 87 +- .../bifrost/csf/mali_kbase_csf_scheduler.c | 212 +++- .../bifrost/csf/mali_kbase_csf_sync_debugfs.c | 788 +++++++++++++++ .../bifrost/csf/mali_kbase_csf_sync_debugfs.h | 37 + .../bifrost/csf/mali_kbase_csf_tiler_heap.c | 82 +- .../csf/mali_kbase_csf_tiler_heap_reclaim.c | 4 + drivers/gpu/arm/bifrost/debug/Kbuild | 3 +- .../backend/mali_kbase_debug_coresight_csf.c | 851 ++++++++++++++++ .../mali_kbase_debug_coresight_internal_csf.h | 182 ++++ .../device/backend/mali_kbase_device_csf.c | 24 +- .../device/backend/mali_kbase_device_hw_csf.c | 8 +- .../device/backend/mali_kbase_device_hw_jm.c | 4 +- .../device/backend/mali_kbase_device_jm.c | 20 +- .../arm/bifrost/device/mali_kbase_device.c | 8 + .../device/mali_kbase_device_internal.h | 12 +- .../mali_kbase_hwcnt_backend_csf_if_fw.c | 12 +- .../backend/mali_kbase_hwcnt_backend_jm.c | 5 +- .../ipa/backend/mali_kbase_ipa_counter_csf.c | 45 +- .../ipa/backend/mali_kbase_ipa_counter_jm.c | 5 +- drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c | 30 +- .../arm/bifrost/ipa/mali_kbase_ipa_simple.c | 6 +- .../gpu/arm/bifrost/jm/mali_kbase_jm_defs.h | 2 +- drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h | 23 +- .../arm/bifrost/mali_base_hwconfig_features.h | 10 - .../arm/bifrost/mali_base_hwconfig_issues.h | 164 +++- drivers/gpu/arm/bifrost/mali_kbase.h | 30 +- .../arm/bifrost/mali_kbase_as_fault_debugfs.c | 10 +- .../gpu/arm/bifrost/mali_kbase_core_linux.c | 59 +- .../gpu/arm/bifrost/mali_kbase_ctx_sched.c | 41 +- .../gpu/arm/bifrost/mali_kbase_ctx_sched.h | 12 +- drivers/gpu/arm/bifrost/mali_kbase_defs.h | 34 +- drivers/gpu/arm/bifrost/mali_kbase_fence.h | 91 +- .../gpu/arm/bifrost/mali_kbase_fence_ops.c | 50 +- drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c | 53 +- drivers/gpu/arm/bifrost/mali_kbase_hw.c | 12 +- .../gpu/arm/bifrost/mali_kbase_hwaccess_jm.h | 17 +- drivers/gpu/arm/bifrost/mali_kbase_jd.c | 21 +- drivers/gpu/arm/bifrost/mali_kbase_jm.c | 18 +- drivers/gpu/arm/bifrost/mali_kbase_js.c | 210 ++-- .../gpu/arm/bifrost/mali_kbase_kinstr_jm.c | 5 + .../arm/bifrost/mali_kbase_kinstr_prfcnt.c | 515 ++++------ drivers/gpu/arm/bifrost/mali_kbase_linux.h | 4 +- drivers/gpu/arm/bifrost/mali_kbase_mem.c | 407 ++++++-- drivers/gpu/arm/bifrost/mali_kbase_mem.h | 148 ++- .../gpu/arm/bifrost/mali_kbase_mem_linux.c | 241 ++++- .../gpu/arm/bifrost/mali_kbase_mem_linux.h | 2 +- .../gpu/arm/bifrost/mali_kbase_mem_migrate.c | 347 ++++++- .../gpu/arm/bifrost/mali_kbase_mem_migrate.h | 7 +- drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c | 84 +- drivers/gpu/arm/bifrost/mali_kbase_softjobs.c | 41 +- drivers/gpu/arm/bifrost/mali_kbase_vinstr.c | 5 + .../bifrost/mmu/backend/mali_kbase_mmu_csf.c | 13 +- .../bifrost/mmu/backend/mali_kbase_mmu_jm.c | 6 +- drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c | 914 +++++++++++++++--- drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h | 109 ++- .../gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h | 45 +- .../bifrost/mmu/mali_kbase_mmu_hw_direct.c | 8 + drivers/gpu/arm/bifrost/tests/Mconfig | 73 -- .../bifrost/tests/include/kutf/kutf_helpers.h | 27 +- .../gpu/arm/bifrost/tests/kutf/kutf_helpers.c | 14 +- .../arm/bifrost/tl/mali_kbase_timeline_io.c | 5 + .../gpu/arm/bifrost/tl/mali_kbase_tlstream.h | 12 +- .../arm/bifrost/tl/mali_kbase_tracepoints.c | 32 + .../arm/bifrost/tl/mali_kbase_tracepoints.h | 37 + drivers/hwtracing/coresight/mali/Kbuild | 65 ++ drivers/hwtracing/coresight/mali/Kconfig | 47 + drivers/hwtracing/coresight/mali/Makefile | 101 ++ drivers/hwtracing/coresight/mali/build.bp | 100 ++ .../coresight/mali/coresight_mali_common.c | 62 ++ .../coresight/mali/coresight_mali_common.h | 133 +++ .../mali/sources/coresight_mali_sources.c | 168 ++++ .../mali/sources/coresight_mali_sources.h | 94 ++ .../mali/sources/ela/coresight-ela600.h | 129 +++ .../ela/coresight_mali_source_ela_core.c | 666 +++++++++++++ .../etm/coresight_mali_source_etm_core.c | 280 ++++++ .../itm/coresight_mali_source_itm_core.c | 265 +++++ .../linux/mali_kbase_debug_coresight_csf.h | 241 +++++ include/linux/version_compat_defs.h | 6 +- .../backend/gpu/mali_kbase_model_dummy.h | 13 + .../backend/gpu/mali_kbase_model_linux.h | 38 + .../arm/bifrost/csf/mali_base_csf_kernel.h | 18 +- .../arm/bifrost/csf/mali_kbase_csf_ioctl.h | 10 +- .../gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h | 1 - .../gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h | 10 +- .../gpu/arm/bifrost/mali_kbase_hwcnt_reader.h | 4 +- .../uapi/gpu/arm/bifrost/mali_kbase_ioctl.h | 276 +++--- 137 files changed, 11502 insertions(+), 2737 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-device-mali-coresight-source create mode 100644 Documentation/csf_sync_state_dump.txt create mode 100644 Documentation/devicetree/bindings/arm/mali-coresight-source.txt delete mode 100644 drivers/base/arm/Mconfig delete mode 100644 drivers/gpu/arm/bifrost/Mconfig create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h create mode 100644 drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c create mode 100644 drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h delete mode 100644 drivers/gpu/arm/bifrost/tests/Mconfig create mode 100644 drivers/hwtracing/coresight/mali/Kbuild create mode 100644 drivers/hwtracing/coresight/mali/Kconfig create mode 100644 drivers/hwtracing/coresight/mali/Makefile create mode 100644 drivers/hwtracing/coresight/mali/build.bp create mode 100644 drivers/hwtracing/coresight/mali/coresight_mali_common.c create mode 100644 drivers/hwtracing/coresight/mali/coresight_mali_common.h create mode 100644 drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c create mode 100644 drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h create mode 100644 drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h create mode 100644 drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c create mode 100644 drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c create mode 100644 drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c create mode 100644 include/linux/mali_kbase_debug_coresight_csf.h create mode 100644 include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h diff --git a/Documentation/ABI/testing/sysfs-device-mali-coresight-source b/Documentation/ABI/testing/sysfs-device-mali-coresight-source new file mode 100644 index 000000000000..a24a88a824e4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-device-mali-coresight-source @@ -0,0 +1,113 @@ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation) and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program) and can also be obtained + * from Free Software Foundation) Inc.) 51 Franklin Street) Fifth Floor) + * Boston) MA 02110-1301) USA. + * + */ + +What: /sys/bus/coresight/devices/mali-source-etm/enable_source +Description: + Attribute used to enable Coresight Source ETM. + +What: /sys/bus/coresight/devices/mali-source-etm/is_enabled +Description: + Attribute used to check if Coresight Source ITM is enabled. + +What: /sys/bus/coresight/devices/mali-source-etm/trcconfigr +Description: + Coresight Source ETM trace configuration to enable global + timestamping, and data value tracing. + +What: /sys/bus/coresight/devices/mali-source-etm/trctraceidr +Description: + Coresight Source ETM trace ID. + +What: /sys/bus/coresight/devices/mali-source-etm/trcvdarcctlr +Description: + Coresight Source ETM viewData include/exclude address + range comparators. + +What: /sys/bus/coresight/devices/mali-source-etm/trcviiectlr +Description: + Coresight Source ETM viewInst include and exclude control. + +What: /sys/bus/coresight/devices/mali-source-etm/trcstallctlr +Description: + Coresight Source ETM stall control register. + +What: /sys/bus/coresight/devices/mali-source-itm/enable_source +Description: + Attribute used to enable Coresight Source ITM. + +What: /sys/bus/coresight/devices/mali-source-itm/is_enabled +Description: + Attribute used to check if Coresight Source ITM is enabled. + +What: /sys/bus/coresight/devices/mali-source-itm/dwt_ctrl +Description: + Coresight Source DWT configuration: + [0] = 1, enable cycle counter + [4:1] = 4, set PC sample rate pf 256 cycles + [8:5] = 1, set initial post count value + [9] = 1, select position of post count tap on the cycle counter + [10:11] = 1, enable sync packets + [12] = 1, enable periodic PC sample packets + +What: /sys/bus/coresight/devices/mali-source-itm/itm_tcr +Description: + Coresight Source ITM configuration: + [0] = 1, Enable ITM + [1] = 1, Enable Time stamp generation + [2] = 1, Enable sync packet transmission + [3] = 1, Enable HW event forwarding + [11:10] = 1, Generate TS request approx every 128 cycles + [22:16] = 1, Trace bus ID + +What: /sys/bus/coresight/devices/mali-source-ela/enable_source +Description: + Attribute used to enable Coresight Source ELA. + +What: /sys/bus/coresight/devices/mali-source-ela/is_enabled +Description: + Attribute used to check if Coresight Source ELA is enabled. + +What: /sys/bus/coresight/devices/mali-source-ela/select +Description: + Coresight Source ELA select trace mode: + [0], NONE + [1], JCN + [2], CEU_EXEC + [3], CEU_CMDS + [4], MCU_AHBP + [5], HOST_AXI + [6], NR_TRACEMODE + + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/sigmask0 +Description: + Coresight Source ELA SIGMASK0 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/sigmask4 +Description: + Coresight Source ELA SIGMASK4 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/sigcomp0 +Description: + Coresight Source ELA SIGCOMP0 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/sigcomp4 +Description: + Coresight Source ELA SIGCOMP4 register set/get. + Refer to specification for more details. diff --git a/Documentation/csf_sync_state_dump.txt b/Documentation/csf_sync_state_dump.txt new file mode 100644 index 000000000000..dc1e48774377 --- /dev/null +++ b/Documentation/csf_sync_state_dump.txt @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +DebugFS interface: +------------------ + +A new per-kbase-context debugfs file called csf_sync has been implemented +which captures the current KCPU & GPU queue state of the not-yet-completed +operations and displayed through the debugfs file. +This file is at: +======================================================= +/sys/kernel/debug/mali0/ctx/_/csf_sync +======================================================= + +Output Format: +---------------- + +The csf_sync file contains important data for the currently active queues. +This data is formatted into two segments, which are separated by a +pipe character: the common properties and the operation-specific properties. + +Common Properties: +------------------ + +* Queue type: GPU or KCPU. +* kbase context id and the queue id. +* If the queue type is a GPU queue then the group handle is also noted, +in the middle of the other two IDs. The slot value is also dumped. +* Execution status, which can either be 'P' for pending or 'S' for started. +* Command type is then output which indicates the type of dependency +(i.e. wait or signal). +* Object address which is a pointer to the sync object that the +command operates on. +* The live value, which is the value of the synchronization object +at the time of dumping. This could help to determine why wait +operations might be blocked. + +Operation-Specific Properties: +------------------------------ + +The operation-specific values for KCPU queue fence operations +are as follows: a unique timeline name, timeline context, and a fence +sequence number. The CQS WAIT and CQS SET are denoted in the sync dump +as their OPERATION counterparts, and therefore show the same operation +specific values; the argument value to wait on or set to, and operation type, +being (by definition) op:gt and op:set for CQS_WAIT and CQS_SET respectively. + +There are only two operation-specific values for operations in GPU queues +which are always shown; the argument value to wait on or set/add to, +and the operation type (set/add) or wait condition (e.g. LE, GT, GE). + +Examples +-------- +GPU Queue Example +------------------ + +The following output is of a GPU queue, from a process that has a KCTX ID of 52, +is in Queue Group (CSG) 0, and has Queue ID 0. It has started and is waiting on +the object at address 0x0000007f81ffc800. The live value is 0, +as is the arg value. However, the operation "op" is GT, indicating it's waiting +for the live value to surpass the arg value: + +====================================================================================================================================== +queue:GPU-52-0-0 exec:S cmd:SYNC_WAIT slot:4 obj:0x0000007f81ffc800 live_value:0x0000000000000000 | op:gt arg_value:0x0000000000000000 +====================================================================================================================================== + +The following is an example of GPU queue dump, where the SYNC SET operation +is blocked by the preceding SYNC WAIT operation. This shows two GPU queues, +with the same KCTX ID of 8, Queue Group (CSG) 0, and Queue ID 0. The SYNC WAIT +operation has started, while the SYNC SET is pending, blocked by the SYNC WAIT. +Both operations are on the same slot, 2 and have live value of 0. The SYNC WAIT +is waiting on the object at address 0x0000007f81ffc800, while the SYNC SET will +set the object at address 0x00000000a3bad4fb when it is unblocked. +The operation "op" is GT for the SYNC WAIT, indicating it's waiting for the +live value to surpass the arg value, while the operation and arg value for the +SYNC SET is "set" and "1" respectively: + +====================================================================================================================================== +queue:GPU-8-0-0 exec:S cmd:SYNC_WAIT slot:2 obj:0x0000007f81ffc800 live_value:0x0000000000000000 | op:gt arg_value:0x0000000000000000 +queue:GPU-8-0-0 exec:P cmd:SYNC_SET slot:2 obj:0x00000000a3bad4fb live_value:0x0000000000000000 | op:set arg_value:0x0000000000000001 +====================================================================================================================================== + +KCPU Queue Example +------------------ + +The following is an example of a KCPU queue, from a process that has +a KCTX ID of 0 and has Queue ID 1. It has started and is waiting on the +object at address 0x0000007fbf6f2ff8. The live value is currently 0 with +the "op" being GT indicating it is waiting on the live value to +surpass the arg value. + +=============================================================================================================================== +queue:KCPU-0-1 exec:S cmd:CQS_WAIT_OPERATION obj:0x0000007fbf6f2ff8 live_value:0x0000000000000000 | op:gt arg_value: 0x00000000 +=============================================================================================================================== diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt index 2b3b1d028ccd..caf2de5e47be 100644 --- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt +++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt @@ -235,6 +235,7 @@ gpu@0xfc010000 { ... pbha { int_id_override = <2 0x32>, <9 0x05>, <16 0x32>; + propagate_bits = <0x03>; }; ... }; diff --git a/Documentation/devicetree/bindings/arm/mali-coresight-source.txt b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt new file mode 100644 index 000000000000..87a1ce3b3e85 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt @@ -0,0 +1,160 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# +===================================== +ARM CoreSight Mali Source integration +===================================== + +See Documentation/trace/coresight/coresight.rst for detailed information +about Coresight. + +This documentation will cover Mali specific devicetree integration. + +References to Sink ports are given as examples. Access to Sink is specific +to an implementation and would require dedicated kernel modules. + +ARM Coresight Mali Source ITM +============================= + +Required properties +------------------- + +- compatible: Has to be "arm,coresight-mali-source-itm" +- gpu : phandle to a Mali GPU definition +- port: + - endpoint: + - remote-endpoint: phandle to a Coresight sink port + +Example +------- + +mali-source-itm { + compatible = "arm,coresight-mali-source-itm"; + gpu = <&gpu>; + port { + mali_source_itm_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port0>; + }; + }; +}; + +ARM Coresight Mali Source ETM +============================= + +Required properties +------------------- + +- compatible: Has to be "arm,coresight-mali-source-etm" +- gpu : phandle to a Mali GPU definition +- port: + - endpoint: + - remote-endpoint: phandle to a Coresight sink port + +Example +------- + +mali-source-etm { + compatible = "arm,coresight-mali-source-etm"; + gpu = <&gpu>; + port { + mali_source_etm_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port1>; + }; + }; +}; + +ARM Coresight Mali Source ELA +============================= + +Required properties +------------------- + +- compatible: Has to be "arm,coresight-mali-source-ela" +- gpu : phandle to a Mali GPU definition +- signal-groups: Signal groups indexed from 0 to 5. + Used to configure the signal channels. + - sgN: Types of signals attached to one channel. + It can be more than one type in the case of + JCN request/response. + + Types: + - "jcn-request": Can share the channel with "jcn-response" + - "jcn-response": Can share the channel with "jcn-request" + - "ceu-execution": Cannot share the channel with other types + - "ceu-commands": Cannot share the channel with other types + - "mcu-ahbp": Cannot share the channel with other types + - "host-axi": Cannot share the channel with other types + + + If the HW implementation shares a common channel + for JCN response and request (total of 4 channels), + Refer to: + - "Example: Shared JCN request/response channel" + Otherwise (total of 5 channels), refer to: + - "Example: Split JCN request/response channel" +- port: + - endpoint: + - remote-endpoint: phandle to a Coresight sink port + +Example: Split JCN request/response channel +-------------------------------------------- + +This examples applies to implementations with a total of 5 signal groups, +where JCN request and response are assigned to independent channels. + +mali-source-ela { + compatible = "arm,coresight-mali-source-ela"; + gpu = <&gpu>; + signal-groups { + sg0 = "jcn-request"; + sg1 = "jcn-response"; + sg2 = "ceu-execution"; + sg3 = "ceu-commands"; + sg4 = "mcu-ahbp"; + sg5 = "host-axi"; + }; + port { + mali_source_ela_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port2>; + }; + }; +}; + +Example: Shared JCN request/response channel +-------------------------------------------- + +This examples applies to implementations with a total of 4 signal groups, +where JCN request and response are assigned to the same channel. + +mali-source-ela { + compatible = "arm,coresight-mali-source-ela"; + gpu = <&gpu>; + signal-groups { + sg0 = "jcn-request", "jcn-response"; + sg1 = "ceu-execution"; + sg2 = "ceu-commands"; + sg3 = "mcu-ahbp"; + sg4 = "host-axi"; + }; + port { + mali_source_ela_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port1>; + }; + }; +}; diff --git a/Documentation/dma-buf-test-exporter.txt b/Documentation/dma-buf-test-exporter.txt index b01020c06751..70a92f7d3e28 100644 --- a/Documentation/dma-buf-test-exporter.txt +++ b/Documentation/dma-buf-test-exporter.txt @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2013, 2020-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2013, 2020-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -38,5 +38,5 @@ The buffers support all of the dma_buf API, including mmap. It supports being compiled as a module both in-tree and out-of-tree. -See include/linux/dma-buf-test-exporter.h for the ioctl interface. +See include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h for the ioctl interface. See Documentation/dma-buf-sharing.txt for details on dma_buf. diff --git a/drivers/base/arm/Makefile b/drivers/base/arm/Makefile index c1a61a1106d0..cc4bde71d3e6 100644 --- a/drivers/base/arm/Makefile +++ b/drivers/base/arm/Makefile @@ -90,6 +90,12 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ KBUILD_CFLAGS += -Wall -Werror +ifeq ($(CONFIG_GCOV_KERNEL), y) + KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) + KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) + EXTRA_CFLAGS += -DGCOV_PROFILE=1 +endif + # The following were added to align with W=1 in scripts/Makefile.extrawarn # from the Linux source tree (v5.18.14) KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter @@ -120,7 +126,8 @@ KBUILD_CFLAGS += -Wdisabled-optimization # global variables. KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) KBUILD_CFLAGS += -Wmissing-field-initializers -KBUILD_CFLAGS += -Wtype-limits +# -Wtype-limits must be disabled due to build failures on kernel 5.x +KBUILD_CFLAGS += -Wno-type-limit KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) diff --git a/drivers/base/arm/Mconfig b/drivers/base/arm/Mconfig deleted file mode 100644 index f7787f0ccd34..000000000000 --- a/drivers/base/arm/Mconfig +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -# -# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU license. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# - -menuconfig MALI_BASE_MODULES - bool "Mali Base extra modules" - default y if BACKEND_KERNEL - help - Enable this option to build support for a Arm Mali base modules. - Those modules provide extra features or debug interfaces and, - are optional for the use of the Mali GPU modules. - -config DMA_SHARED_BUFFER_TEST_EXPORTER - bool "Build dma-buf framework test exporter module" - depends on MALI_BASE_MODULES - default y - help - This option will build the dma-buf framework test exporter module. - Usable to help test importers. - - Modules: - - dma-buf-test-exporter.ko - -config MALI_MEMORY_GROUP_MANAGER - bool "Build Mali Memory Group Manager module" - depends on MALI_BASE_MODULES - default y - help - This option will build the memory group manager module. - This is an example implementation for allocation and release of pages - for memory pools managed by Mali GPU device drivers. - - Modules: - - memory_group_manager.ko - -config MALI_PROTECTED_MEMORY_ALLOCATOR - bool "Build Mali Protected Memory Allocator module" - depends on MALI_BASE_MODULES && GPU_HAS_CSF - default y - help - This option will build the protected memory allocator module. - This is an example implementation for allocation and release of pages - of secure memory intended to be used by the firmware - of Mali GPU device drivers. - - Modules: - - protected_memory_allocator.ko - diff --git a/drivers/base/arm/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c index 825893e3cf8e..2acb9faf12d0 100644 --- a/drivers/base/arm/memory_group_manager/memory_group_manager.c +++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c @@ -228,8 +228,8 @@ static int mgm_initialize_debugfs(struct mgm_groups *mgm_data) #define ORDER_SMALL_PAGE 0 #define ORDER_LARGE_PAGE 9 -static void update_size(struct memory_group_manager_device *mgm_dev, int - group_id, int order, bool alloc) +static void update_size(struct memory_group_manager_device *mgm_dev, unsigned int group_id, + int order, bool alloc) { struct mgm_groups *data = mgm_dev->data; diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild index 70f3997b2bd3..398e102a0af5 100644 --- a/drivers/gpu/arm/bifrost/Kbuild +++ b/drivers/gpu/arm/bifrost/Kbuild @@ -69,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"g15p0-01eac0"' +MALI_RELEASE_NAME ?= '"g17p0-01eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) MALI_UNIT_TEST = 1 diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig index 1bfb59ca14e2..e530e8c85b17 100644 --- a/drivers/gpu/arm/bifrost/Kconfig +++ b/drivers/gpu/arm/bifrost/Kconfig @@ -41,9 +41,30 @@ config MALI_PLATFORM_NAME include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must exist. -config MALI_REAL_HW +choice + prompt "Mali HW backend" depends on MALI_BIFROST - def_bool !MALI_BIFROST_NO_MALI + default MALI_REAL_HW + +config MALI_REAL_HW + bool "Enable build of Mali kernel driver for real HW" + depends on MALI_BIFROST + help + This is the default HW backend. + +config MALI_BIFROST_NO_MALI + bool "Enable build of Mali kernel driver for No Mali" + depends on MALI_BIFROST && MALI_BIFROST_EXPERT + help + This can be used to test the driver in a simulated environment + whereby the hardware is not physically present. If the hardware is physically + present it will not be used. This can be used to test the majority of the + driver without needing actual hardware or for software benchmarking. + All calls to the simulated hardware will complete immediately as if the hardware + completed the task. + + +endchoice menu "Platform specific options" source "drivers/gpu/arm/bifrost/platform/Kconfig" @@ -91,6 +112,21 @@ config MALI_BIFROST_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled +config MALI_FW_CORE_DUMP + bool "Enable support for FW core dump" + depends on MALI_BIFROST && MALI_CSF_SUPPORT + default n + help + Adds ability to request firmware core dump through the "fw_core_dump" + debugfs file + + Example: + * To explicitly request core dump: + echo 1 > /sys/kernel/debug/mali0/fw_core_dump + * To output current core dump (after explicitly requesting a core dump, + or kernel driver reported an internal firmware error): + cat /sys/kernel/debug/mali0/fw_core_dump + config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" depends on MALI_BIFROST && !MALI_CSF_SUPPORT @@ -127,6 +163,11 @@ config MALI_DMA_BUF_LEGACY_COMPAT flushes in other drivers. This only has an effect for clients using UK 11.18 or older. For later UK versions it is not possible. +config MALI_CORESIGHT + depends on MALI_BIFROST && MALI_CSF_SUPPORT && !MALI_BIFROST_NO_MALI + bool "Enable Kbase CoreSight tracing support" + default n + menuconfig MALI_BIFROST_EXPERT depends on MALI_BIFROST bool "Enable Expert Settings" @@ -174,18 +215,6 @@ config MALI_CORESTACK comment "Platform options" depends on MALI_BIFROST && MALI_BIFROST_EXPERT -config MALI_BIFROST_NO_MALI - bool "Enable No Mali" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - This can be used to test the driver in a simulated environment - whereby the hardware is not physically present. If the hardware is physically - present it will not be used. This can be used to test the majority of the - driver without needing actual hardware or for software benchmarking. - All calls to the simulated hardware will complete immediately as if the hardware - completed the task. - config MALI_BIFROST_ERROR_INJECT bool "Enable No Mali error injection" depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI @@ -204,20 +233,6 @@ config MALI_GEM5_BUILD comment "Debug options" depends on MALI_BIFROST && MALI_BIFROST_EXPERT -config MALI_FW_CORE_DUMP - bool "Enable support for FW core dump" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_CSF_SUPPORT - default n - help - Adds ability to request firmware core dump - - Example: - * To explicitly request core dump: - echo 1 >/sys/kernel/debug/mali0/fw_core_dump - * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump - config MALI_BIFROST_DEBUG bool "Enable debug build" depends on MALI_BIFROST && MALI_BIFROST_EXPERT diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile index 3fb736d7950e..dfe96d8c37e7 100644 --- a/drivers/gpu/arm/bifrost/Makefile +++ b/drivers/gpu/arm/bifrost/Makefile @@ -39,17 +39,10 @@ ifeq ($(CONFIG_MALI_BIFROST),m) CONFIG_MALI_ARBITRATION ?= n CONFIG_MALI_PARTITION_MANAGER ?= n - ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined) - CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION) - endif - - ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined) - CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION) - endif - ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y) # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI CONFIG_MALI_REAL_HW ?= y + CONFIG_MALI_CORESIGHT = n endif ifeq ($(CONFIG_MALI_BIFROST_DVFS),y) @@ -64,10 +57,11 @@ ifeq ($(CONFIG_MALI_BIFROST),m) CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n endif - ifeq ($(CONFIG_XEN),y) - ifneq ($(CONFIG_MALI_ARBITER_SUPPORT), n) - CONFIG_MALI_XEN ?= m - endif + ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) + CONFIG_MALI_FW_CORE_DUMP ?= y + CONFIG_MALI_CORESIGHT ?= n + else + CONFIG_MALI_FW_CORE_DUMP ?= n endif # @@ -76,12 +70,14 @@ ifeq ($(CONFIG_MALI_BIFROST),m) ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y) ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y) CONFIG_MALI_REAL_HW = n + else # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n CONFIG_MALI_REAL_HW = y CONFIG_MALI_BIFROST_ERROR_INJECT = n endif + ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n @@ -143,12 +139,11 @@ ifeq ($(CONFIG_MALI_BIFROST),m) else # Prevent misuse when CONFIG_MALI_BIFROST=n CONFIG_MALI_ARBITRATION = n - CONFIG_MALI_ARBITER_MODULES = n - CONFIG_MALI_GPU_POWER_MODULES = n CONFIG_MALI_KUTF = n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n + CONFIG_MALI_FW_CORE_DUMP = n endif # All Mali CONFIG should be listed here @@ -158,8 +153,6 @@ CONFIGS := \ CONFIG_MALI_BIFROST_GATOR_SUPPORT \ CONFIG_MALI_ARBITER_SUPPORT \ CONFIG_MALI_ARBITRATION \ - CONFIG_MALI_ARBITER_MODULES \ - CONFIG_MALI_GPU_POWER_MODULES \ CONFIG_MALI_PARTITION_MANAGER \ CONFIG_MALI_REAL_HW \ CONFIG_MALI_GEM5_BUILD \ @@ -189,10 +182,14 @@ CONFIGS := \ CONFIG_MALI_KUTF_IRQ_TEST \ CONFIG_MALI_KUTF_CLK_RATE_TRACE \ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ - CONFIG_MALI_XEN + CONFIG_MALI_XEN \ + CONFIG_MALI_FW_CORE_DUMP \ + CONFIG_MALI_CORESIGHT -# +THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST))) +-include $(THIS_DIR)/../arbitration/Makefile + # MAKE_ARGS to pass the custom CONFIGs on out-of-tree build # # Generate the list of CONFIGs and values. @@ -254,7 +251,8 @@ KBUILD_CFLAGS += -Wdisabled-optimization # global variables. KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) KBUILD_CFLAGS += -Wmissing-field-initializers -KBUILD_CFLAGS += -Wtype-limits +# -Wtype-limits must be disabled due to build failures on kernel 5.x +KBUILD_CFLAGS += -Wno-type-limit KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) @@ -263,6 +261,12 @@ KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 # This warning is disabled to avoid build failures in some kernel versions KBUILD_CFLAGS += -Wno-ignored-qualifiers +ifeq ($(CONFIG_GCOV_KERNEL),y) + KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) + KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) + EXTRA_CFLAGS += -DGCOV_PROFILE=1 +endif + all: $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules diff --git a/drivers/gpu/arm/bifrost/Mconfig b/drivers/gpu/arm/bifrost/Mconfig deleted file mode 100644 index f812bcad639c..000000000000 --- a/drivers/gpu/arm/bifrost/Mconfig +++ /dev/null @@ -1,326 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -# -# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU license. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# - -menuconfig MALI_BIFROST - bool "Mali Midgard series support" - default y - help - Enable this option to build support for a ARM Mali Midgard GPU. - - To compile this driver as a module, choose M here: - this will generate a single module, called mali_kbase. - -config MALI_PLATFORM_NAME - depends on MALI_BIFROST - string "Platform name" - default "hisilicon" if PLATFORM_HIKEY960 - default "hisilicon" if PLATFORM_HIKEY970 - default "devicetree" - help - Enter the name of the desired platform configuration directory to - include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must - exist. - - When PLATFORM_CUSTOM is set, this needs to be set manually to - pick up the desired platform files. - -config MALI_REAL_HW - bool - depends on MALI_BIFROST - default y - default n if NO_MALI - -config MALI_PLATFORM_DT_PIN_RST - bool "Enable Juno GPU Pin reset" - depends on MALI_BIFROST - default n - default y if BUSLOG - help - Enables support for GPUs pin reset on Juno platforms. - -config MALI_CSF_SUPPORT - bool "Enable Mali CSF based GPU support" - depends on MALI_BIFROST - default y if GPU_HAS_CSF - help - Enables support for CSF based GPUs. - -config MALI_BIFROST_DEVFREQ - bool "Enable devfreq support for Mali" - depends on MALI_BIFROST - default y - help - Support devfreq for Mali. - - Using the devfreq framework and, by default, the simple on-demand - governor, the frequency of Mali will be dynamically selected from the - available OPPs. - -config MALI_BIFROST_DVFS - bool "Enable legacy DVFS" - depends on MALI_BIFROST && !MALI_BIFROST_DEVFREQ - default n - help - Choose this option to enable legacy DVFS in the Mali Midgard DDK. - -config MALI_BIFROST_GATOR_SUPPORT - bool "Enable Streamline tracing support" - depends on MALI_BIFROST && !BACKEND_USER - default y - help - Enables kbase tracing used by the Arm Streamline Performance Analyzer. - The tracepoints are used to derive GPU activity charts in Streamline. - -config MALI_BIFROST_ENABLE_TRACE - bool "Enable kbase tracing" - depends on MALI_BIFROST - default y if MALI_BIFROST_DEBUG - default n - help - Enables tracing in kbase. Trace log available through - the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled - -config MALI_ARBITER_SUPPORT - bool "Enable arbiter support for Mali" - depends on MALI_BIFROST && !MALI_CSF_SUPPORT - default n - help - Enable support for the arbiter interface in the driver. - This allows an external arbiter to manage driver access - to GPU hardware in a virtualized environment - - If unsure, say N. - -config DMA_BUF_SYNC_IOCTL_SUPPORTED - bool "Enable Kernel DMA buffers support DMA_BUF_IOCTL_SYNC" - depends on MALI_BIFROST && BACKEND_KERNEL - default y - -config MALI_DMA_BUF_MAP_ON_DEMAND - bool "Enable map imported dma-bufs on demand" - depends on MALI_BIFROST - default n - default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED - help - This option will cause kbase to set up the GPU mapping of imported - dma-buf when needed to run atoms. This is the legacy behavior. - - This is intended for testing and the option will get removed in the - future. - -config MALI_DMA_BUF_LEGACY_COMPAT - bool "Enable legacy compatibility cache flush on dma-buf map" - depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND - default n - help - This option enables compatibility with legacy dma-buf mapping - behavior, then the dma-buf is mapped on import, by adding cache - maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, - including a cache flush. - - This option might work-around issues related to missing cache - flushes in other drivers. This only has an effect for clients using - UK 11.18 or older. For later UK versions it is not possible. - -menuconfig MALI_BIFROST_EXPERT - depends on MALI_BIFROST - bool "Enable Expert Settings" - default y - help - Enabling this option and modifying the default settings may produce - a driver with performance or other limitations. - -config MALI_MEMORY_FULLY_BACKED - bool "Enable memory fully physically-backed" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - This option enables full physical backing of all virtual - memory allocations in the kernel. Notice that this build - option only affects allocations of grow-on-GPU-page-fault - memory. - -config MALI_CORESTACK - bool "Enable support of GPU core stack power control" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - Enabling this feature on supported GPUs will let the driver powering - on/off the GPU core stack independently without involving the Power - Domain Controller. This should only be enabled on platforms which - integration of the PDC to the Mali GPU is known to be problematic. - This feature is currently only supported on t-Six and t-HEx GPUs. - - If unsure, say N. - -config MALI_FW_CORE_DUMP - bool "Enable support for FW core dump" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_CSF_SUPPORT - default n - help - Adds ability to request firmware core dump - - Example: - * To explicitly request core dump: - echo 1 >/sys/kernel/debug/mali0/fw_core_dump - * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump - -choice - prompt "Error injection level" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default MALI_ERROR_INJECT_NONE - help - Enables insertion of errors to test module failure and recovery mechanisms. - -config MALI_ERROR_INJECT_NONE - bool "disabled" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - help - Error injection is disabled. - -config MALI_ERROR_INJECT_TRACK_LIST - bool "error track list" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI - help - Errors to inject are pre-configured by the user. - -config MALI_ERROR_INJECT_RANDOM - bool "random error injection" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI - help - Injected errors are random, rather than user-driven. - -endchoice - -config MALI_ERROR_INJECT_ON - string - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default "0" if MALI_ERROR_INJECT_NONE - default "1" if MALI_ERROR_INJECT_TRACK_LIST - default "2" if MALI_ERROR_INJECT_RANDOM - -config MALI_BIFROST_ERROR_INJECT - bool - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default y if !MALI_ERROR_INJECT_NONE - -config MALI_GEM5_BUILD - bool "Enable build of Mali kernel driver for GEM5" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - This option is to do a Mali GEM5 build. - If unsure, say N. - -config MALI_BIFROST_DEBUG - bool "Enable debug build" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default y if DEBUG - default n - help - Select this option for increased checking and reporting of errors. - -config MALI_BIFROST_FENCE_DEBUG - bool "Enable debug sync fence usage" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default y if MALI_BIFROST_DEBUG - help - Select this option to enable additional checking and reporting on the - use of sync fences in the Mali driver. - - This will add a 3s timeout to all sync fence waits in the Mali - driver, so that when work for Mali has been waiting on a sync fence - for a long time a debug message will be printed, detailing what fence - is causing the block, and which dependent Mali atoms are blocked as a - result of this. - - The timeout can be changed at runtime through the js_soft_timeout - device attribute, where the timeout is specified in milliseconds. - -config MALI_BIFROST_SYSTEM_TRACE - bool "Enable system event tracing support" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default y if MALI_BIFROST_DEBUG - default n - help - Choose this option to enable system trace events for each - kbase event. This is typically used for debugging but has - minimal overhead when not in use. Enable only if you know what - you are doing. - -# Instrumentation options. - -# config MALI_PRFCNT_SET_PRIMARY exists in the Kernel Kconfig but is configured using CINSTR_PRIMARY_HWC in Mconfig. -# config MALI_BIFROST_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. -# config MALI_PRFCNT_SET_TERTIARY exists in the Kernel Kconfig but is configured using CINSTR_TERTIARY_HWC in Mconfig. -# config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS exists in the Kernel Kconfig but is configured using CINSTR_HWC_SET_SELECT_VIA_DEBUG_FS in Mconfig. - -config MALI_JOB_DUMP - bool "Enable system level support needed for job dumping" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - Choose this option to enable system level support needed for - job dumping. This is typically used for instrumentation but has - minimal overhead when not in use. Enable only if you know what - you are doing. - -config MALI_PWRSOFT_765 - bool "Enable workaround for PWRSOFT-765" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged - in kernel v4.10, however if backported into the kernel then this - option must be manually selected. - - If using kernel >= v4.10 then say N, otherwise if devfreq cooling - changes have been backported say Y to avoid compilation errors. - - -config MALI_HW_ERRATA_1485982_NOT_AFFECTED - bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - default y if PLATFORM_JUNO - help - This option disables the default workaround for GPU2017-1336. The - workaround keeps the L2 cache powered up except for powerdown and reset. - - The workaround introduces a limitation that will prevent the running of - protected mode content on fully coherent platforms, as the switch to IO - coherency mode requires the L2 to be turned off. - -config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE - bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED - default n - help - This option uses an alternative workaround for GPU2017-1336. Lowering - the GPU clock to a, platform specific, known good frequeuncy before - powering down the L2 cache. The clock can be specified in the device - tree using the property, opp-mali-errata-1485982. Otherwise the - slowest clock will be selected. - - -source "kernel/drivers/gpu/arm/arbitration/Mconfig" -source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild index 7eec91ff6631..efebc8a544d1 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild @@ -47,8 +47,12 @@ endif bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \ backend/gpu/mali_kbase_devfreq.o -# Dummy model +ifneq ($(CONFIG_MALI_REAL_HW),y) + bifrost_kbase-y += backend/gpu/mali_kbase_model_linux.o +endif + +# NO_MALI Dummy model interface bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o -bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_linux.o # HW error simulation bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o + diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c index 15999cbc9126..eb63b2c56c3d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c @@ -25,12 +25,12 @@ #include -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#if IS_ENABLED(CONFIG_MALI_REAL_HW) /* GPU IRQ Tags */ -#define JOB_IRQ_TAG 0 -#define MMU_IRQ_TAG 1 -#define GPU_IRQ_TAG 2 +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 static void *kbase_tag(void *ptr, u32 tag) { @@ -500,4 +500,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c index 888aa59421a7..258dc6dac6c5 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,9 +67,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, kbase_js_runpool_inc_context_count(kbdev, kctx); } -bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { int i; @@ -240,4 +239,3 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, return true; } - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c index e17014e45f6b..ab27e8bde40e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c @@ -44,9 +44,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, const u64 affinity, const u64 limited_core_mask); -static u64 kbase_job_write_affinity(struct kbase_device *kbdev, - base_jd_core_req core_req, - int js, const u64 limited_core_mask) +static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, + unsigned int js, const u64 limited_core_mask) { u64 affinity; bool skip_affinity_check = false; @@ -191,7 +190,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; } -int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js) +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js) { struct kbase_context *kctx; u32 cfg; @@ -344,10 +343,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, * work out the best estimate (which might still result in an over-estimate to * the calculated time spent) */ -static void kbasep_job_slot_update_head_start_timestamp( - struct kbase_device *kbdev, - int js, - ktime_t end_timestamp) +static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js, + ktime_t end_timestamp) { ktime_t timestamp_diff; struct kbase_jd_atom *katom; @@ -377,8 +374,7 @@ static void kbasep_job_slot_update_head_start_timestamp( * Make a tracepoint call to the instrumentation module informing that * softstop happened on given lpu (job slot). */ -static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, - int js) +static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js) { KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( kbdev, @@ -387,7 +383,6 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, void kbase_job_done(struct kbase_device *kbdev, u32 done) { - int i; u32 count = 0; ktime_t end_timestamp; @@ -398,6 +393,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) end_timestamp = ktime_get_raw(); while (done) { + unsigned int i; u32 failed = done >> 16; /* treat failed slots as finished slots */ @@ -407,8 +403,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * numbered interrupts before the higher numbered ones. */ i = ffs(finished) - 1; - if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__)) - break; do { int nr_done; @@ -607,11 +601,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count); } -void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, - int js, - u32 action, - base_jd_core_req core_reqs, - struct kbase_jd_atom *target_katom) +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, + u32 action, base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom) { #if KBASE_KTRACE_ENABLE u32 status_reg_before; @@ -669,6 +661,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, struct kbase_context *head_kctx; head = kbase_gpu_inspect(kbdev, js, 0); + if (unlikely(!head)) { + dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); + return; + } head_kctx = head->kctx; if (status_reg_before == BASE_JD_EVENT_ACTIVE) @@ -737,7 +733,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; - int i; + unsigned int i; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -749,7 +745,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; - int target_js = target_katom->slot_nr; + unsigned int target_js = target_katom->slot_nr; int i; bool stop_sent = false; @@ -927,8 +923,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); * * Where possible any job in the next register is evicted before the soft-stop. */ -void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom, u32 sw_flags) +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *target_katom, u32 sw_flags) { dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); @@ -948,8 +944,8 @@ void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); } -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom) +void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, + struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev = kctx->kbdev; bool stopped; @@ -1258,7 +1254,7 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) { - int i; + unsigned int i; int pending_jobs = 0; /* Count the number of jobs */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h index 1ebb8434046c..e4cff1f1e59c 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h @@ -33,21 +33,6 @@ #include #include -/** - * kbase_job_submit_nolock() - Submit a job to a certain job-slot - * @kbdev: Device pointer - * @katom: Atom to submit - * @js: Job slot to submit on - * - * The caller must check kbasep_jm_is_submit_slots_free() != false before - * calling this. - * - * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock - */ -void kbase_job_submit_nolock(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, int js); - /** * kbase_job_done_slot() - Complete the head job on a particular job-slot * @kbdev: Device pointer @@ -60,17 +45,16 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp); #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -static inline char *kbasep_make_job_slot_string(int js, char *js_string, - size_t js_size) +static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) { - snprintf(js_string, js_size, "job_slot_%i", js); + snprintf(js_string, js_size, "job_slot_%u", js); return js_string; } #endif #if !MALI_USE_CSF -static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, - struct kbase_context *kctx) +static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, unsigned int js, + struct kbase_context *kctx) { return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); } @@ -90,7 +74,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, * * Return: 0 if the job was successfully submitted to hardware, an error otherwise. */ -int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js); #if !MALI_USE_CSF /** @@ -106,11 +90,9 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, * The following locking conditions are made on the caller: * - it must hold the hwaccess_lock */ -void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, - int js, - u32 action, - base_jd_core_req core_reqs, - struct kbase_jd_atom *target_katom); +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, + u32 action, base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom); #endif /* !MALI_USE_CSF */ /** @@ -134,11 +116,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, * * Return: true if an atom was stopped, false otherwise */ -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js, - struct kbase_jd_atom *katom, - u32 action); +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js, struct kbase_jd_atom *katom, u32 action); /** * kbase_job_slot_init - Initialise job slot framework diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c index e5af4ca8fc43..388b37f36a9d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c @@ -93,9 +93,8 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, * * Return: Atom removed from ringbuffer */ -static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, - int js, - ktime_t *end_timestamp) +static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js, + ktime_t *end_timestamp) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; struct kbase_jd_atom *katom; @@ -118,8 +117,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, return katom; } -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, - int idx) +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; @@ -131,8 +129,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; } -struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, - int js) +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; @@ -144,12 +141,13 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) { - int js; - int i; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int i; + for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -160,7 +158,7 @@ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) return false; } -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js) { int nr = 0; int i; @@ -178,7 +176,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) return nr; } -int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js) { int nr = 0; int i; @@ -193,8 +191,8 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) return nr; } -static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, - enum kbase_atom_gpu_rb_state min_rb_state) +static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js, + enum kbase_atom_gpu_rb_state min_rb_state) { int nr = 0; int i; @@ -244,9 +242,11 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, bool secure) { - int js, i; + unsigned int js; for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int i; + for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -261,7 +261,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, return false; } -int kbase_backend_slot_free(struct kbase_device *kbdev, int js) +int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -430,9 +430,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, * * Return: true if any slots other than @js are busy, false otherwise */ -static inline bool other_slots_busy(struct kbase_device *kbdev, int js) +static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js) { - int slot; + unsigned int slot; for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { if (slot == js) @@ -844,7 +844,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, void kbase_backend_slot_update(struct kbase_device *kbdev) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1013,25 +1013,25 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) kbase_pm_request_gpu_cycle_counter_l2_is_on( kbdev); - if (!kbase_job_hw_submit(kbdev, katom[idx], js)) + if (!kbase_job_hw_submit(kbdev, katom[idx], js)) { katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; + + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation + * metrics. + */ + kbase_pm_metrics_update(kbdev, + &katom[idx]->start_timestamp); + + /* Inform platform at start/finish of atom */ + kbasep_platform_event_atom_submit(katom[idx]); + } else break; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; case KBASE_ATOM_GPU_RB_SUBMITTED: - - /* Inform power management at start/finish of - * atom so it can update its GPU utilisation - * metrics. - */ - kbase_pm_metrics_update(kbdev, - &katom[idx]->start_timestamp); - - /* Inform platform at start/finish of atom */ - kbasep_platform_event_atom_submit(katom[idx]); - break; case KBASE_ATOM_GPU_RB_RETURN_TO_JS: @@ -1111,8 +1111,7 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, * * Return: true if an atom was evicted, false otherwise. */ -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, - u32 completion_code) +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code) { struct kbase_jd_atom *katom; struct kbase_jd_atom *next_katom; @@ -1120,6 +1119,10 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, lockdep_assert_held(&kbdev->hwaccess_lock); katom = kbase_gpu_inspect(kbdev, js, 0); + if (!katom) { + dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js); + return false; + } next_katom = kbase_gpu_inspect(kbdev, js, 1); if (next_katom && @@ -1184,13 +1187,18 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, * on the HW and returned to the JS. */ -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, - u32 completion_code, - u64 job_tail, - ktime_t *end_timestamp) +void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); - struct kbase_context *kctx = katom->kctx; + struct kbase_context *kctx = NULL; + + if (unlikely(!katom)) { + dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); + return; + } + + kctx = katom->kctx; dev_dbg(kbdev->dev, "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", @@ -1243,7 +1251,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } } else if (completion_code != BASE_JD_EVENT_DONE) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - int i; + unsigned int i; if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", @@ -1388,7 +1396,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1416,7 +1424,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) kbase_gpu_in_protected_mode(kbdev)); WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && kbase_jd_katom_is_protected(katom), - "Protected atom on JS%d not supported", js); + "Protected atom on JS%u not supported", js); } if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && !kbase_ctx_flag(katom->kctx, KCTX_DYING)) @@ -1512,10 +1520,8 @@ static bool should_stop_next_atom(struct kbase_device *kbdev, return ret; } -static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, - int js, - struct kbase_jd_atom *katom, - u32 action) +static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *katom, u32 action) { struct kbase_context *kctx = katom->kctx; u32 hw_action = action & JS_COMMAND_MASK; @@ -1559,11 +1565,8 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) return -1; } -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js, - struct kbase_jd_atom *katom, - u32 action) +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js, struct kbase_jd_atom *katom, u32 action) { struct kbase_jd_atom *katom_idx0; struct kbase_context *kctx_idx0 = NULL; @@ -1816,7 +1819,7 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, void kbase_gpu_dump_slots(struct kbase_device *kbdev) { unsigned long flags; - int js; + unsigned int js; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1831,12 +1834,10 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) idx); if (katom) - dev_info(kbdev->dev, - " js%d idx%d : katom=%pK gpu_rb_state=%d\n", - js, idx, katom, katom->gpu_rb_state); + dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n", + js, idx, katom, katom->gpu_rb_state); else - dev_info(kbdev->dev, " js%d idx%d : empty\n", - js, idx); + dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx); } } @@ -1845,7 +1846,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) { - int js; + unsigned int js; bool tracked = false; lockdep_assert_held(&kbdev->hwaccess_lock); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h index d3ff203762f9..32be0bf44655 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,8 +40,7 @@ * * Return: true if job evicted from NEXT registers, false otherwise */ -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, - u32 completion_code); +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code); /** * kbase_gpu_complete_hw - Complete an atom on job slot js @@ -53,10 +52,8 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, * completed * @end_timestamp: Time of completion */ -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, - u32 completion_code, - u64 job_tail, - ktime_t *end_timestamp); +void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp); /** * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer @@ -68,8 +65,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * Return: The atom at that position in the ringbuffer * or NULL if no atom present */ -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, - int idx); +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx); /** * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c index a2f824da5e04..cbc88f91a400 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -91,7 +91,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; struct kbase_backend_data *backend; - int s; + unsigned int s; bool reset_needed = false; KBASE_DEBUG_ASSERT(timer != NULL); @@ -375,4 +375,3 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev) backend->timeouts_updated = true; } - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c index 1a0209f702ac..19c345341ea9 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c @@ -62,8 +62,9 @@ * document */ #include +#include #include -#include +#include #include #if MALI_USE_CSF @@ -319,7 +320,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX, .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { @@ -337,21 +338,6 @@ static const struct control_reg_values_t all_control_reg_values[] = { .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, - { - .name = "tDUx", - .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, - .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, - }, { .name = "tODx", .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0), @@ -364,7 +350,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX, .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { @@ -412,7 +398,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0xf, .gpu_features_hi = 0, - .shader_present = 0xFF, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX, .stack_present = 0xF, }, { @@ -428,7 +414,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0xf, .gpu_features_hi = 0, - .shader_present = 0xFF, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX, .stack_present = 0xF, }, }; @@ -530,17 +516,18 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; /* Currently only primary counter blocks are supported */ - if (WARN_ON(event_index >= 64)) + if (WARN_ON(event_index >= + (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE))) return 0; /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for * IPA counters. If selected, the value returned for them will be zero. */ - if (WARN_ON(event_index <= 3)) + if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS)) return 0; - event_index -= 4; + event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; spin_lock_irqsave(&performance_counters.access_lock, flags); @@ -736,7 +723,7 @@ void gpu_model_glb_request_job_irq(void *model) spin_lock_irqsave(&hw_error_status.access_lock, flags); hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; spin_unlock_irqrestore(&hw_error_status.access_lock, flags); - gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ); + gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ); } #endif /* !MALI_USE_CSF */ @@ -768,7 +755,7 @@ static void init_register_statuses(struct dummy_model_t *dummy) performance_counters.time = 0; } -static void update_register_statuses(struct dummy_model_t *dummy, int job_slot) +static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot) { lockdep_assert_held(&hw_error_status.access_lock); @@ -1101,7 +1088,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp return ret; } -void *midgard_model_create(const void *config) +void *midgard_model_create(struct kbase_device *kbdev) { struct dummy_model_t *dummy = NULL; @@ -1118,7 +1105,12 @@ void *midgard_model_create(const void *config) GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); performance_counters.shader_present = get_implementation_register( GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); + + gpu_device_set_data(dummy, kbdev); + + dev_info(kbdev->dev, "Using Dummy Model"); } + return dummy; } @@ -1134,7 +1126,7 @@ static void midgard_model_get_outputs(void *h) lockdep_assert_held(&hw_error_status.access_lock); if (hw_error_status.job_irq_status) - gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ); if ((dummy->power_changed && dummy->power_changed_mask) || (dummy->reset_completed & dummy->reset_completed_mask) || @@ -1145,10 +1137,10 @@ static void midgard_model_get_outputs(void *h) (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) || #endif (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) - gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ); if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) - gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ); } static void midgard_model_update(void *h) @@ -1215,7 +1207,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) } } -u8 midgard_model_write_reg(void *h, u32 addr, u32 value) +void midgard_model_write_reg(void *h, u32 addr, u32 value) { unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; @@ -1225,7 +1217,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) #if !MALI_USE_CSF if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { - int slot_idx = (addr >> 7) & 0xf; + unsigned int slot_idx = (addr >> 7) & 0xf; KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { @@ -1607,11 +1599,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) midgard_model_update(dummy); midgard_model_get_outputs(dummy); spin_unlock_irqrestore(&hw_error_status.access_lock, flags); - - return 1; } -u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) +void midgard_model_read_reg(void *h, u32 addr, u32 *const value) { unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; @@ -2051,8 +2041,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) spin_unlock_irqrestore(&hw_error_status.access_lock, flags); CSTD_UNUSED(dummy); - - return 1; } static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, @@ -2228,3 +2216,16 @@ int gpu_model_control(void *model, return 0; } + +/** + * kbase_is_gpu_removed - Has the GPU been removed. + * @kbdev: Kbase device pointer + * + * This function would return true if the GPU has been removed. + * It is stubbed here + * Return: Always false + */ +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + return false; +} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h index 7d370de9f601..84842291c0f7 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h @@ -21,11 +21,24 @@ /* * Dummy Model interface + * + * Support for NO_MALI dummy Model interface. + * + * +-----------------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------------+ + * | Model Linux Framework | + * +-----------------------------------+ + * | Model Dummy interface definitions | + * +-----------------+-----------------+ + * | Fake R/W | Fake IRQ | + * +-----------------+-----------------+ */ #ifndef _KBASE_MODEL_DUMMY_H_ #define _KBASE_MODEL_DUMMY_H_ +#include #include #define model_error_log(module, ...) pr_err(__VA_ARGS__) @@ -154,11 +167,6 @@ struct gpu_model_prfcnt_en { u32 shader; }; -void *midgard_model_create(const void *config); -void midgard_model_destroy(void *h); -u8 midgard_model_write_reg(void *h, u32 addr, u32 value); -u8 midgard_model_read_reg(void *h, u32 addr, - u32 * const value); void midgard_set_error(int job_slot); int job_atom_inject_error(struct kbase_error_params *params); int gpu_model_control(void *h, @@ -211,17 +219,6 @@ void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt void gpu_model_glb_request_job_irq(void *model); #endif /* MALI_USE_CSF */ -enum gpu_dummy_irq { - GPU_DUMMY_JOB_IRQ, - GPU_DUMMY_GPU_IRQ, - GPU_DUMMY_MMU_IRQ -}; - -void gpu_device_raise_irq(void *model, - enum gpu_dummy_irq irq); -void gpu_device_set_data(void *model, void *data); -void *gpu_device_get_data(void *model); - extern struct error_status_t hw_error_status; #endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c index 972d1c87fb1a..75b1e7e656c0 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c @@ -21,7 +21,7 @@ #include #include -#include "backend/gpu/mali_kbase_model_dummy.h" +#include "backend/gpu/mali_kbase_model_linux.h" static struct kbase_error_atom *error_track_list; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c index 7887cb240d43..b37680ddb29b 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c @@ -20,12 +20,12 @@ */ /* - * Model interface + * Model Linux Framework interfaces. */ #include #include -#include + #include "backend/gpu/mali_kbase_model_linux.h" #include "device/mali_kbase_device.h" #include "mali_kbase_irq_internal.h" @@ -105,8 +105,7 @@ static void serve_mmu_irq(struct work_struct *work) kmem_cache_free(kbdev->irq_slab, data); } -void gpu_device_raise_irq(void *model, - enum gpu_dummy_irq irq) +void gpu_device_raise_irq(void *model, enum model_linux_irqs irq) { struct model_irq_data *data; struct kbase_device *kbdev = gpu_device_get_data(model); @@ -120,15 +119,15 @@ void gpu_device_raise_irq(void *model, data->kbdev = kbdev; switch (irq) { - case GPU_DUMMY_JOB_IRQ: + case MODEL_LINUX_JOB_IRQ: INIT_WORK(&data->work, serve_job_irq); atomic_set(&kbdev->serving_job_irq, 1); break; - case GPU_DUMMY_GPU_IRQ: + case MODEL_LINUX_GPU_IRQ: INIT_WORK(&data->work, serve_gpu_irq); atomic_set(&kbdev->serving_gpu_irq, 1); break; - case GPU_DUMMY_MMU_IRQ: + case MODEL_LINUX_MMU_IRQ: INIT_WORK(&data->work, serve_mmu_irq); atomic_set(&kbdev->serving_mmu_irq, 1); break; @@ -165,22 +164,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) return val; } - KBASE_EXPORT_TEST_API(kbase_reg_read); -/** - * kbase_is_gpu_removed - Has the GPU been removed. - * @kbdev: Kbase device pointer - * - * This function would return true if the GPU has been removed. - * It is stubbed here - * Return: Always false - */ -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - return false; -} - int kbase_install_interrupts(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); @@ -239,16 +224,12 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); int kbase_gpu_device_create(struct kbase_device *kbdev) { - kbdev->model = midgard_model_create(NULL); + kbdev->model = midgard_model_create(kbdev); if (kbdev->model == NULL) return -ENOMEM; - gpu_device_set_data(kbdev->model, kbdev); - spin_lock_init(&kbdev->reg_op_lock); - dev_warn(kbdev->dev, "Using Dummy Model"); - return 0; } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h index dcb2e7cf7c70..a1c480eaf49d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,13 +20,132 @@ */ /* - * Model interface + * Model Linux Framework interfaces. + * + * This framework is used to provide generic Kbase Models interfaces. + * Note: Backends cannot be used together; the selection is done at build time. + * + * - Without Model Linux Framework: + * +-----------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------+ + * | HW interface definitions | + * +-----------------------------+ + * + * - With Model Linux Framework: + * +-----------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------+ + * | Model Linux Framework | + * +-----------------------------+ + * | Model interface definitions | + * +-----------------------------+ */ #ifndef _KBASE_MODEL_LINUX_H_ #define _KBASE_MODEL_LINUX_H_ +/* + * Include Model definitions + */ + +#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#include +#endif /* IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) +/** + * kbase_gpu_device_create() - Generic create function. + * + * @kbdev: Kbase device. + * + * Specific model hook is implemented by midgard_model_create() + * + * Return: 0 on success, error code otherwise. + */ int kbase_gpu_device_create(struct kbase_device *kbdev); + +/** + * kbase_gpu_device_destroy() - Generic create function. + * + * @kbdev: Kbase device. + * + * Specific model hook is implemented by midgard_model_destroy() + */ void kbase_gpu_device_destroy(struct kbase_device *kbdev); -#endif /* _KBASE_MODEL_LINUX_H_ */ +/** + * midgard_model_create() - Private create function. + * + * @kbdev: Kbase device. + * + * This hook is specific to the model built in Kbase. + * + * Return: Model handle. + */ +void *midgard_model_create(struct kbase_device *kbdev); + +/** + * midgard_model_destroy() - Private destroy function. + * + * @h: Model handle. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_destroy(void *h); + +/** + * midgard_model_write_reg() - Private model write function. + * + * @h: Model handle. + * @addr: Address at which to write. + * @value: value to write. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_write_reg(void *h, u32 addr, u32 value); + +/** + * midgard_model_read_reg() - Private model read function. + * + * @h: Model handle. + * @addr: Address from which to read. + * @value: Pointer where to store the read value. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_read_reg(void *h, u32 addr, u32 *const value); + +/** + * gpu_device_raise_irq() - Private IRQ raise function. + * + * @model: Model handle. + * @irq: IRQ type to raise. + * + * This hook is global to the model Linux framework. + */ +void gpu_device_raise_irq(void *model, enum model_linux_irqs irq); + +/** + * gpu_device_set_data() - Private model set data function. + * + * @model: Model handle. + * @data: Data carried by model. + * + * This hook is global to the model Linux framework. + */ +void gpu_device_set_data(void *model, void *data); + +/** + * gpu_device_get_data() - Private model get data function. + * + * @model: Model handle. + * + * This hook is global to the model Linux framework. + * + * Return: Pointer to the data carried by model. + */ +void *gpu_device_get_data(void *model); +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ + +#endif /* _KBASE_MODEL_LINUX_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c index 9e38b904b459..e2b0a919282e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c @@ -26,9 +26,7 @@ #include #include #include -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -#include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#include #include int kbase_pm_ca_init(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c index 8173cf6ba7d7..d86a388c64fb 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c @@ -804,6 +804,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) KBASE_MCU_HCTL_SHADERS_PEND_ON; } else backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; + } else if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { + backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; + } +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ } break; @@ -832,8 +843,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; } @@ -854,9 +864,19 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) backend->mcu_state = KBASE_MCU_HCTL_MCU_ON_RECHECK; } - } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) { + } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + else if (kbdev->csf.coresight.disable_on_pmode_enter) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); + backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE; + } else if (kbdev->csf.coresight.enable_on_pmode_exit) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE; } +#endif break; case KBASE_MCU_HCTL_MCU_ON_RECHECK: @@ -947,12 +967,46 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) #ifdef KBASE_PM_RUNTIME if (backend->gpu_sleep_mode_active) backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE; - else + else { #endif backend->mcu_state = KBASE_MCU_ON_HALT; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); + backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + } } break; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { + backend->mcu_state = KBASE_MCU_ON; + kbdev->csf.coresight.disable_on_pmode_enter = false; + } + break; + case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { + backend->mcu_state = KBASE_MCU_ON; + kbdev->csf.coresight.enable_on_pmode_exit = false; + } + break; + case KBASE_MCU_CORESIGHT_DISABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) + backend->mcu_state = KBASE_MCU_ON_HALT; + break; + + case KBASE_MCU_CORESIGHT_ENABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + break; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + case KBASE_MCU_ON_HALT: if (!kbase_pm_is_mcu_desired(kbdev)) { kbase_csf_firmware_trigger_mcu_halt(kbdev); @@ -1045,6 +1099,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) /* Reset complete */ if (!backend->in_reset) backend->mcu_state = KBASE_MCU_OFF; + +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbdev->csf.coresight.disable_on_pmode_enter = false; + kbdev->csf.coresight.enable_on_pmode_exit = false; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ break; default: @@ -1142,13 +1201,22 @@ static bool can_power_down_l2(struct kbase_device *kbdev) #if MALI_USE_CSF /* Due to the HW issue GPU2019-3878, need to prevent L2 power off * whilst MMU command is in progress. + * Also defer the power-down if MMU is in process of page migration. */ - return !kbdev->mmu_hw_operation_in_progress; + return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress; #else - return true; + return !kbdev->mmu_page_migrate_in_progress; #endif } +static bool can_power_up_l2(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Avoiding l2 transition if MMU is undergoing page migration */ + return !kbdev->mmu_page_migrate_in_progress; +} + static bool need_tiler_control(struct kbase_device *kbdev) { #if MALI_USE_CSF @@ -1220,7 +1288,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) switch (backend->l2_state) { case KBASE_L2_OFF: - if (kbase_pm_is_l2_desired(kbdev)) { + if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) { #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) /* Enable HW timer of IPA control before * L2 cache is powered-up. diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h index 115cd3c34d90..e66ce57d3120 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h @@ -995,4 +995,27 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd } #endif +/** + * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Check whether the L2 state is in power transition phase or not. If it is, the MMU + * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU + * page migration is intended, immediately start the MMU migration action without + * dropping the lock. When page migration begins, a flag is set in kbdev that would + * prevent the L2 state machine traversing into power transition phases, until + * the MMU migration action ends. + * + * Return: true if MMU page migration is allowed + */ +static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF); +} + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h index 5e57c9d0c0e1..3b448e397e72 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,6 +66,13 @@ * is being put to sleep. * @ON_PEND_SLEEP: MCU sleep is in progress. * @IN_SLEEP: Sleep request is completed and MCU has halted. + * @ON_PMODE_ENTER_CORESIGHT_DISABLE: The MCU is on, protected mode enter is about to + * be requested, Coresight is being disabled. + * @ON_PMODE_EXIT_CORESIGHT_ENABLE : The MCU is on, protected mode exit has happened + * Coresight is being enabled. + * @CORESIGHT_DISABLE: The MCU is on and Coresight is being disabled. + * @CORESIGHT_ENABLE: The MCU is on, host does not have control and + * Coresight is being enabled. */ KBASEP_MCU_STATE(OFF) KBASEP_MCU_STATE(PEND_ON_RELOAD) @@ -92,3 +99,10 @@ KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND) KBASEP_MCU_STATE(ON_SLEEP_INITIATE) KBASEP_MCU_STATE(ON_PEND_SLEEP) KBASEP_MCU_STATE(IN_SLEEP) +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +/* Additional MCU states for Coresight */ +KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE) +KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE) +KBASEP_MCU_STATE(CORESIGHT_DISABLE) +KBASEP_MCU_STATE(CORESIGHT_ENABLE) +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c index 29e945d00fbe..865f526f61f2 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c @@ -464,7 +464,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev) */ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->pm.backend.metrics.lock); diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp index a17ff432398c..48c1fb44f494 100644 --- a/drivers/gpu/arm/bifrost/build.bp +++ b/drivers/gpu/arm/bifrost/build.bp @@ -28,7 +28,7 @@ bob_defaults { defaults: [ "kernel_defaults", ], - no_mali: { + mali_no_mali: { kbuild_options: [ "CONFIG_MALI_BIFROST_NO_MALI=y", "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", @@ -140,6 +140,9 @@ bob_defaults { mali_fw_core_dump: { kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"], }, + mali_coresight: { + kbuild_options: ["CONFIG_MALI_CORESIGHT=y"], + }, kbuild_options: [ "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", "MALI_CUSTOMER_RELEASE={{.release}}", diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c index 3abc7a2a66f4..07d277b947d2 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c @@ -34,6 +34,7 @@ #if IS_ENABLED(CONFIG_DEBUG_FS) #include #include +#include #include #include #include @@ -50,6 +51,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx) kbase_jit_debugfs_init(kctx); kbase_csf_queue_group_debugfs_init(kctx); kbase_csf_kcpu_debugfs_init(kctx); + kbase_csf_sync_debugfs_init(kctx); kbase_csf_tiler_heap_debugfs_init(kctx); kbase_csf_tiler_heap_total_debugfs_init(kctx); kbase_csf_cpu_queue_debugfs_init(kctx); diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c index b6abfc44d212..792f724f16e4 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c @@ -129,10 +129,6 @@ int kbase_context_common_init(struct kbase_context *kctx) /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); - kctx->as_nr = KBASEP_AS_NR_INVALID; - - atomic_set(&kctx->refcount, 0); - spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); @@ -251,15 +247,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx) void kbase_context_common_term(struct kbase_context *kctx) { - unsigned long flags; int pages; - mutex_lock(&kctx->kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - kbase_ctx_sched_remove_ctx(kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); - mutex_unlock(&kctx->kbdev->mmu_hw_mutex); - pages = atomic_read(&kctx->used_pages); if (pages != 0) dev_warn(kctx->kbdev->dev, diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild index 1474bdaacb0d..44217dba10c0 100644 --- a/drivers/gpu/arm/bifrost/csf/Kbuild +++ b/drivers/gpu/arm/bifrost/csf/Kbuild @@ -31,20 +31,24 @@ bifrost_kbase-y += \ csf/mali_kbase_csf_reset_gpu.o \ csf/mali_kbase_csf_csg_debugfs.o \ csf/mali_kbase_csf_kcpu_debugfs.o \ + csf/mali_kbase_csf_sync_debugfs.o \ csf/mali_kbase_csf_protected_memory.o \ csf/mali_kbase_csf_tiler_heap_debugfs.o \ csf/mali_kbase_csf_cpu_queue_debugfs.o \ csf/mali_kbase_csf_event.o \ csf/mali_kbase_csf_firmware_log.o \ - csf/mali_kbase_csf_tiler_heap_reclaim.o + csf/mali_kbase_csf_firmware_core_dump.o \ + csf/mali_kbase_csf_tiler_heap_reclaim.o \ + csf/mali_kbase_csf_mcu_shared_reg.o -bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o - -bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o +ifeq ($(CONFIG_MALI_BIFROST_NO_MALI),y) +bifrost_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o +else +bifrost_kbase-y += csf/mali_kbase_csf_firmware.o +endif bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o - ifeq ($(KBUILD_EXTMOD),) # in-tree -include $(src)/csf/ipa_control/Kbuild diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c index b77007300c5c..5f4061b2ab62 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,15 @@ #include #include #include "mali_kbase_csf_event.h" -#include +#include +#include "mali_kbase_csf_mcu_shared_reg.h" #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) #define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) +#define PROTM_ALLOC_MAX_RETRIES ((u8)5) + const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { KBASE_QUEUE_GROUP_PRIORITY_HIGH, KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, @@ -130,21 +133,6 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx, return 0; } -static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va_region *reg, - struct tagged_addr *phys) -{ - size_t num_pages = 2; - - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys, - num_pages, MCU_AS_NR); - - WARN_ON(reg->flags & KBASE_REG_FREE); - - mutex_lock(&kctx->kbdev->csf.reg_lock); - kbase_remove_va_region(kctx->kbdev, reg); - mutex_unlock(&kctx->kbdev->csf.reg_lock); -} - static void init_user_io_pages(struct kbase_queue *queue) { u32 *input_addr = (u32 *)(queue->user_io_addr); @@ -162,76 +150,15 @@ static void init_user_io_pages(struct kbase_queue *queue) output_addr[CS_ACTIVE/4] = 0; } -/* Map the input/output pages in the shared interface segment of MCU firmware - * address space. - */ -static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, - struct tagged_addr *phys, struct kbase_va_region *reg) -{ - unsigned long mem_flags = KBASE_REG_GPU_RD; - const size_t num_pages = 2; - int ret; - - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - - if (kbdev->system_coherency == COHERENCY_NONE) { - mem_flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); - } else { - mem_flags |= KBASE_REG_SHARE_BOTH | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); - } - - mutex_lock(&kbdev->csf.reg_lock); - ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1); - reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kbdev->csf.reg_lock); - - if (ret) - return ret; - - /* Map input page */ - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, - &phys[0], 1, mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_IO, mmu_sync_info); - if (ret) - goto bad_insert; - - /* Map output page, it needs rw access */ - mem_flags |= KBASE_REG_GPU_WR; - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, - reg->start_pfn + 1, &phys[1], 1, mem_flags, - MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, - mmu_sync_info); - if (ret) - goto bad_insert_output_page; - - return 0; - -bad_insert_output_page: - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR); -bad_insert: - mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(kbdev, reg); - mutex_unlock(&kbdev->csf.reg_lock); - - return ret; -} - static void kernel_unmap_user_io_pages(struct kbase_context *kctx, struct kbase_queue *queue) { - const size_t num_pages = 2; - kbase_gpu_vm_lock(kctx); vunmap(queue->user_io_addr); - WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages)); - atomic_sub(num_pages, &kctx->permanent_mapped_pages); + WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES); + atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages); kbase_gpu_vm_unlock(kctx); } @@ -312,63 +239,56 @@ static void release_queue(struct kbase_queue *queue); * If an explicit or implicit unbind was missed by the userspace then the * mapping will persist. On process exit kernel itself will remove the mapping. */ -static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, - struct kbase_queue *queue) +void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) { - const size_t num_pages = 2; - - gpu_munmap_user_io_pages(kctx, queue->reg, &queue->phys[0]); kernel_unmap_user_io_pages(kctx, queue); kbase_mem_pool_free_pages( &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - num_pages, queue->phys, true, false); + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false); + kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES); - kfree(queue->reg); - queue->reg = NULL; + /* The user_io_gpu_va should have been unmapped inside the scheduler */ + WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping"); /* If the queue has already been terminated by userspace * then the ref count for queue object will drop to 0 here. */ release_queue(queue); } +KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages); -int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, - struct kbase_queue *queue) +int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) { struct kbase_device *kbdev = kctx->kbdev; - struct kbase_va_region *reg; - const size_t num_pages = 2; int ret; lockdep_assert_held(&kctx->csf.lock); - reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, - num_pages, KBASE_REG_ZONE_MCU_SHARED); - if (!reg) - return -ENOMEM; - - ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages, + ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false); - - if (ret != num_pages) - goto phys_alloc_failed; + if (ret != KBASEP_NUM_CS_USER_IO_PAGES) { + /* Marking both the phys to zero for indicating there is no phys allocated */ + queue->phys[0].tagged_addr = 0; + queue->phys[1].tagged_addr = 0; + return -ENOMEM; + } ret = kernel_map_user_io_pages(kctx, queue); if (ret) goto kernel_map_failed; + kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES); init_user_io_pages(queue); - ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg); - if (ret) - goto gpu_mmap_failed; - - queue->reg = reg; + /* user_io_gpu_va is only mapped when scheduler decides to put the queue + * on slot at runtime. Initialize it to 0, signalling no mapping. + */ + queue->user_io_gpu_va = 0; mutex_lock(&kbdev->csf.reg_lock); - if (kbdev->csf.db_file_offsets > - (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) + if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) kbdev->csf.db_file_offsets = 0; queue->db_file_offset = kbdev->csf.db_file_offsets; @@ -388,19 +308,16 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, return 0; -gpu_mmap_failed: - kernel_unmap_user_io_pages(kctx, queue); - kernel_map_failed: - kbase_mem_pool_free_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - num_pages, queue->phys, false, false); + kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false); + /* Marking both the phys to zero for indicating there is no phys allocated */ + queue->phys[0].tagged_addr = 0; + queue->phys[1].tagged_addr = 0; -phys_alloc_failed: - kfree(reg); - - return -ENOMEM; + return ret; } +KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages); static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, u8 group_handle) @@ -418,6 +335,12 @@ static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, return NULL; } +struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle) +{ + return find_queue_group(kctx, group_handle); +} +KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group); + int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle) { @@ -468,6 +391,17 @@ static void release_queue(struct kbase_queue *queue) "Remove any pending command queue fatal from ctx %d_%d", queue->kctx->tgid, queue->kctx->id); kbase_csf_event_remove_error(queue->kctx, &queue->error); + + /* After this the Userspace would be able to free the + * memory for GPU queue. In case the Userspace missed + * terminating the queue, the cleanup will happen on + * context termination where tear down of region tracker + * would free up the GPU queue memory. + */ + kbase_gpu_vm_lock(queue->kctx); + kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg); + kbase_gpu_vm_unlock(queue->kctx); + kfree(queue); } } @@ -521,7 +455,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx, region = kbase_region_tracker_find_region_enclosing_address(kctx, queue_addr); - if (kbase_is_region_invalid_or_free(region)) { + if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) || + region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { ret = -ENOENT; goto out_unlock_vm; } @@ -570,7 +505,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->kctx = kctx; queue->base_addr = queue_addr; - queue->queue_reg = region; + queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region); queue->size = (queue_size << PAGE_SHIFT); queue->csi_index = KBASEP_IF_NR_INVALID; queue->enabled = false; @@ -608,7 +543,6 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->extract_ofs = 0; - region->flags |= KBASE_REG_NO_USER_FREE; region->user_data = queue; /* Initialize the cs_trace configuration parameters, When buffer_size @@ -702,16 +636,8 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, unbind_queue(kctx, queue); kbase_gpu_vm_lock(kctx); - if (!WARN_ON(!queue->queue_reg)) { - /* After this the Userspace would be able to free the - * memory for GPU queue. In case the Userspace missed - * terminating the queue, the cleanup will happen on - * context termination where tear down of region tracker - * would free up the GPU queue memory. - */ - queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE; + if (!WARN_ON(!queue->queue_reg)) queue->queue_reg->user_data = NULL; - } kbase_gpu_vm_unlock(kctx); release_queue(queue); @@ -875,6 +801,15 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, if (WARN_ON(slot_bitmap > allowed_bitmap)) return; + /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and + * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request + * or 2 CSI requests overlap and FW ends up missing the 2nd request. + * Memory barrier is required, both on Host and FW side, to guarantee the ordering. + * + * 'osh' is used as CPU and GPU would be in the same Outer shareable domain. + */ + dmb(osh); + value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); value ^= slot_bitmap; kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, @@ -913,6 +848,14 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, WARN_ON(csi_index >= ginfo->stream_num)) return; + /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to + * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to + * FW before CS_REQ/ACK is set. + * + * 'osh' is used as CPU and GPU would be in the same outer shareable domain. + */ + dmb(osh); + value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK); value ^= (1 << csi_index); kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value, @@ -930,6 +873,8 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_va_region *region; int err = 0; + KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr); + /* GPU work submission happening asynchronously to prevent the contention with * scheduler lock and as the result blocking application thread. For this reason, * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr @@ -1018,6 +963,15 @@ static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue) } } +static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue) +{ + /* The queue's phys are zeroed when allocation fails. Both of them being + * zero is an impossible condition for a successful allocated set of phy pages. + */ + + return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr); +} + void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit) { struct kbase_context *kctx = queue->kctx; @@ -1043,8 +997,8 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit) unbind_queue(kctx, queue); } - /* Free the resources, if allocated for this queue. */ - if (queue->reg) + /* Free the resources, if allocated phys for this queue */ + if (kbase_csf_queue_phys_allocated(queue)) kbase_csf_free_command_stream_user_pages(kctx, queue); } @@ -1057,8 +1011,8 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue) WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND); unbind_stopped_queue(kctx, queue); - /* Free the resources, if allocated for this queue. */ - if (queue->reg) + /* Free the resources, if allocated phys for this queue */ + if (kbase_csf_queue_phys_allocated(queue)) kbase_csf_free_command_stream_user_pages(kctx, queue); } @@ -1121,167 +1075,39 @@ static bool iface_has_enough_streams(struct kbase_device *const kbdev, * @kctx: Pointer to kbase context where the queue group is created at * @s_buf: Pointer to suspend buffer that is attached to queue group * - * Return: 0 if suspend buffer is successfully allocated and reflected to GPU - * MMU page table. Otherwise -ENOMEM. + * Return: 0 if phy-pages for the suspend buffer is successfully allocated. + * Otherwise -ENOMEM or error code. */ static int create_normal_suspend_buffer(struct kbase_context *const kctx, struct kbase_normal_suspend_buffer *s_buf) { - struct kbase_va_region *reg = NULL; - const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); - int err = 0; - - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + int err; lockdep_assert_held(&kctx->csf.lock); - /* Allocate and initialize Region Object */ - reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, - nr_pages, KBASE_REG_ZONE_MCU_SHARED); - - if (!reg) - return -ENOMEM; + /* The suspend buffer's mapping address is valid only when the CSG is to + * run on slot, initializing it 0, signalling the buffer is not mapped. + */ + s_buf->gpu_va = 0; s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL); - if (!s_buf->phy) { - err = -ENOMEM; - goto phy_alloc_failed; - } + if (!s_buf->phy) + return -ENOMEM; /* Get physical page for a normal suspend buffer */ err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, &s_buf->phy[0], false); - if (err < 0) - goto phy_pages_alloc_failed; - - /* Insert Region Object into rbtree and make virtual address available - * to map it to physical page - */ - mutex_lock(&kctx->kbdev->csf.reg_lock); - err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1); - reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kctx->kbdev->csf.reg_lock); - - if (err) - goto add_va_region_failed; - - /* Update MMU table */ - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, - reg->start_pfn, &s_buf->phy[0], nr_pages, - mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_FW, mmu_sync_info); - if (err) - goto mmu_insert_failed; - - s_buf->reg = reg; - - return 0; - -mmu_insert_failed: - mutex_lock(&kctx->kbdev->csf.reg_lock); - kbase_remove_va_region(kctx->kbdev, reg); - mutex_unlock(&kctx->kbdev->csf.reg_lock); - -add_va_region_failed: - kbase_mem_pool_free_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, - &s_buf->phy[0], false, false); - -phy_pages_alloc_failed: - kfree(s_buf->phy); -phy_alloc_failed: - kfree(reg); - - return err; -} - -/** - * create_protected_suspend_buffer() - Create protected-mode suspend buffer - * per queue group - * - * @kbdev: Instance of a GPU platform device that implements a CSF interface. - * @s_buf: Pointer to suspend buffer that is attached to queue group - * - * Return: 0 if suspend buffer is successfully allocated and reflected to GPU - * MMU page table. Otherwise -ENOMEM. - */ -static int create_protected_suspend_buffer(struct kbase_device *const kbdev, - struct kbase_protected_suspend_buffer *s_buf) -{ - struct kbase_va_region *reg = NULL; - struct tagged_addr *phys = NULL; - const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; - const size_t nr_pages = - PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); - int err = 0; - - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - - /* Allocate and initialize Region Object */ - reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, - nr_pages, KBASE_REG_ZONE_MCU_SHARED); - - if (!reg) - return -ENOMEM; - - phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); - if (!phys) { - err = -ENOMEM; - goto phy_alloc_failed; + if (err < 0) { + kfree(s_buf->phy); + return err; } - s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, - nr_pages, true); - if (s_buf->pma == NULL) { - err = -ENOMEM; - goto pma_alloc_failed; - } - - /* Insert Region Object into rbtree and make virtual address available - * to map it to physical page - */ - mutex_lock(&kbdev->csf.reg_lock); - err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1); - reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kbdev->csf.reg_lock); - - if (err) - goto add_va_region_failed; - - /* Update MMU table */ - err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, - phys, nr_pages, mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_FW, mmu_sync_info); - if (err) - goto mmu_insert_failed; - - s_buf->reg = reg; - kfree(phys); + kbase_process_page_usage_inc(kctx, nr_pages); return 0; - -mmu_insert_failed: - mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(kbdev, reg); - mutex_unlock(&kbdev->csf.reg_lock); - -add_va_region_failed: - kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true); -pma_alloc_failed: - kfree(phys); -phy_alloc_failed: - kfree(reg); - - return err; } static void timer_event_worker(struct work_struct *data); @@ -1302,26 +1128,17 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx, static int create_suspend_buffers(struct kbase_context *const kctx, struct kbase_queue_group * const group) { - int err = 0; - if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) { dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n"); return -ENOMEM; } - if (kctx->kbdev->csf.pma_dev) { - err = create_protected_suspend_buffer(kctx->kbdev, - &group->protected_suspend_buf); - if (err) { - term_normal_suspend_buffer(kctx, - &group->normal_suspend_buf); - dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n"); - } - } else { - group->protected_suspend_buf.reg = NULL; - } + /* Protected suspend buffer, runtime binding so just initialize it */ + group->protected_suspend_buf.gpu_va = 0; + group->protected_suspend_buf.pma = NULL; + group->protected_suspend_buf.alloc_retries = 0; - return err; + return 0; } /** @@ -1387,6 +1204,9 @@ static int create_queue_group(struct kbase_context *const kctx, group->cs_unrecoverable = false; group->reevaluate_idle_status = false; + group->csg_reg = NULL; + group->csg_reg_bind_retries = 0; + group->dvs_buf = create->in.dvs_buf; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -1518,65 +1338,39 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, * @s_buf: Pointer to queue group suspend buffer to be freed */ static void term_normal_suspend_buffer(struct kbase_context *const kctx, - struct kbase_normal_suspend_buffer *s_buf) + struct kbase_normal_suspend_buffer *s_buf) { - const size_t nr_pages = - PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); + const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); lockdep_assert_held(&kctx->csf.lock); - WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, - s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR)); + /* The group should not have a bind remaining on any suspend buf region */ + WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination"); - WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); - - mutex_lock(&kctx->kbdev->csf.reg_lock); - kbase_remove_va_region(kctx->kbdev, s_buf->reg); - mutex_unlock(&kctx->kbdev->csf.reg_lock); - - kbase_mem_pool_free_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - nr_pages, &s_buf->phy[0], false, false); + kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, + &s_buf->phy[0], false, false); + kbase_process_page_usage_dec(kctx, nr_pages); kfree(s_buf->phy); s_buf->phy = NULL; - kfree(s_buf->reg); - s_buf->reg = NULL; } /** - * term_protected_suspend_buffer() - Free normal-mode suspend buffer of + * term_protected_suspend_buffer() - Free protected-mode suspend buffer of * queue group * * @kbdev: Instance of a GPU platform device that implements a CSF interface. - * @s_buf: Pointer to queue group suspend buffer to be freed + * @sbuf: Pointer to queue group suspend buffer to be freed */ static void term_protected_suspend_buffer(struct kbase_device *const kbdev, - struct kbase_protected_suspend_buffer *s_buf) + struct kbase_protected_suspend_buffer *sbuf) { - const size_t nr_pages = - PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); - struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL); - size_t i = 0; - - for (i = 0; phys && i < nr_pages; i++) - phys[i] = as_tagged(s_buf->pma[i]->pa); - - WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys, - nr_pages, MCU_AS_NR)); - - kfree(phys); - - WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); - - mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(kbdev, s_buf->reg); - mutex_unlock(&kbdev->csf.reg_lock); - - kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true); - s_buf->pma = NULL; - kfree(s_buf->reg); - s_buf->reg = NULL; + WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!"); + if (sbuf->pma) { + const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true); + sbuf->pma = NULL; + } } void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) @@ -1743,6 +1537,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, kfree(group); } +KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate); int kbase_csf_queue_group_suspend(struct kbase_context *kctx, struct kbase_suspend_copy_buffer *sus_buf, @@ -2022,12 +1817,10 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * registered. */ #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - if (atomic_read(&queue->refcount) != 1) + WARN_ON(atomic_read(&queue->refcount) != 1); #else - if (refcount_read(&queue->refcount) != 1) + WARN_ON(refcount_read(&queue->refcount) != 1); #endif - dev_warn(kctx->kbdev->dev, - "Releasing queue with incorrect refcounting!\n"); list_del_init(&queue->link); release_queue(queue); } @@ -2378,6 +2171,81 @@ static void handle_progress_timer_event(struct kbase_queue_group *const group) queue_work(group->kctx->csf.wq, &group->timer_event_work); } +/** + * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected + * memory for the protected mode suspend buffer. + * @group: Pointer to the GPU queue group. + * + * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise + * negative error value. + */ +static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group) +{ + struct kbase_device *const kbdev = group->kctx->kbdev; + struct kbase_context *kctx = group->kctx; + struct tagged_addr *phys = NULL; + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + size_t nr_pages; + int err = 0; + + if (likely(sbuf->pma)) + return 0; + + nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); + if (unlikely(!phys)) { + err = -ENOMEM; + goto phys_free; + } + + mutex_lock(&kctx->csf.lock); + kbase_csf_scheduler_lock(kbdev); + + if (unlikely(!group->csg_reg)) { + /* The only chance of the bound csg_reg is removed from the group is + * that it has been put off slot by the scheduler and the csg_reg resource + * is contended by other groups. In this case, it needs another occasion for + * mapping the pma, which needs a bound csg_reg. Since the group is already + * off-slot, returning no error is harmless as the scheduler, when place the + * group back on-slot again would do the required MMU map operation on the + * allocated and retained pma. + */ + WARN_ON(group->csg_nr >= 0); + dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode", + group->kctx->tgid, group->kctx->id, group->handle); + goto unlock; + } + + /* Allocate the protected mode pages */ + sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true); + if (unlikely(!sbuf->pma)) { + err = -ENOMEM; + goto unlock; + } + + /* Map the bound susp_reg to the just allocated pma pages */ + err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); + +unlock: + kbase_csf_scheduler_unlock(kbdev); + mutex_unlock(&kctx->csf.lock); +phys_free: + kfree(phys); + return err; +} + +static void report_group_fatal_error(struct kbase_queue_group *const group) +{ + struct base_gpu_queue_group_error const + err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { .fatal_group = { + .status = GPU_EXCEPTION_TYPE_SW_FAULT_0, + } } }; + + kbase_csf_add_group_fatal_error(group, &err_payload); + kbase_event_wakeup(group->kctx); +} + /** * protm_event_worker - Protected mode switch request event handler * called from a workqueue. @@ -2390,10 +2258,26 @@ static void protm_event_worker(struct work_struct *data) { struct kbase_queue_group *const group = container_of(data, struct kbase_queue_group, protm_event_work); + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + int err = 0; KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); - kbase_csf_scheduler_group_protm_enter(group); + + err = alloc_grp_protected_suspend_buffer_pages(group); + if (!err) { + kbase_csf_scheduler_group_protm_enter(group); + } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { + sbuf->alloc_retries++; + /* try again to allocate pages */ + queue_work(group->kctx->csf.wq, &group->protm_event_work); + } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { + dev_err(group->kctx->kbdev->dev, + "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", + group->handle, group->kctx->tgid, group->kctx->id); + report_group_fatal_error(group); + } + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); } @@ -2517,7 +2401,10 @@ static void cs_error_worker(struct work_struct *const data) struct kbase_queue_group *group; u8 group_handle; bool reset_prevented = false; - int err = kbase_reset_gpu_prevent_and_wait(kbdev); + int err; + + kbase_debug_csf_fault_wait_completion(kbdev); + err = kbase_reset_gpu_prevent_and_wait(kbdev); if (err) dev_warn( @@ -2526,7 +2413,6 @@ static void cs_error_worker(struct work_struct *const data) else reset_prevented = true; - kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&kctx->csf.lock); group = get_bound_queue_group(queue); @@ -2724,12 +2610,17 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, get_queue(queue); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, group, queue, cs_req ^ cs_ack); - if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) { + if (!queue_work(wq, &queue->oom_event_work)) { /* The work item shall not have been * already queued, there can be only * one pending OoM event for a * queue. */ + dev_warn( + kbdev->dev, + "Tiler OOM work pending: queue %d group %d (ctx %d_%d)", + queue->csi_index, group->handle, queue->kctx->tgid, + queue->kctx->id); release_queue(queue); } } @@ -2760,6 +2651,9 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, track->protm_grp = group; } + if (!group->protected_suspend_buf.pma) + queue_work(group->kctx->csf.wq, &group->protm_event_work); + if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask); @@ -2801,8 +2695,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) return; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); - ginfo = &kbdev->csf.global_iface.groups[csg_nr]; req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); @@ -2811,7 +2703,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c /* There may not be any pending CSG/CS interrupts to process */ if ((req == ack) && (irqreq == irqack)) - goto out; + return; /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before * examining the CS_ACK & CS_REQ bits. This would ensure that Host @@ -2832,10 +2724,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c * slot scheduler spinlock is required. */ if (!group) - goto out; + return; if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr)) - goto out; + return; + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { kbase_csf_firmware_csg_input_mask(ginfo, @@ -2897,8 +2791,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c process_cs_interrupts(group, ginfo, irqreq, irqack, track); -out: - /* group may still be NULL here */ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group, ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32)); } @@ -3058,6 +2950,10 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack) kbase_ipa_control_protm_exited(kbdev); kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); } + +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbase_debug_coresight_csf_enable_pmode_exit(kbdev); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ } static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h index b2677405761f..9fbc932b7905 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,12 +40,15 @@ */ #define KBASEP_USER_DB_NR_INVALID ((s8)-1) +/* Number of pages used for GPU command queue's User input & output data */ +#define KBASEP_NUM_CS_USER_IO_PAGES (2) + /* Indicates an invalid value for the scan out sequence number, used to * signify there is no group that has protected mode execution pending. */ #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX) -#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */ +#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */ /* Idle hysteresis time can be scaled down when GPU sleep feature is used */ #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5) @@ -123,6 +126,25 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, void kbase_csf_queue_terminate(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_terminate *term); +/** + * kbase_csf_free_command_stream_user_pages() - Free the resources allocated + * for a queue at the time of bind. + * + * @kctx: Address of the kbase context within which the queue was created. + * @queue: Pointer to the queue to be unlinked. + * + * This function will free the pair of physical pages allocated for a GPU + * command queue, and also release the hardware doorbell page, that were mapped + * into the process address space to enable direct submission of commands to + * the hardware. Also releases the reference taken on the queue when the mapping + * was created. + * + * If an explicit or implicit unbind was missed by the userspace then the + * mapping will persist. On process exit kernel itself will remove the mapping. + */ +void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, + struct kbase_queue *queue); + /** * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a * GPU command queue. @@ -185,6 +207,20 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue); int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick); +/** + * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding + * to the indicated handle. + * + * @kctx: The kbase context under which the queue group exists. + * @group_handle: Handle for the group which uniquely identifies it within + * the context with which it was created. + * + * This function is used to find the queue group when passed a handle. + * + * Return: Pointer to a queue group on success, NULL on failure + */ +struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle); + /** * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle * is valid. @@ -464,4 +500,5 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) return 0; #endif } + #endif /* _KBASE_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c index 3afbe6d4005e..e96044ae6239 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c @@ -100,7 +100,7 @@ static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev, } } -static void update_active_groups_status(struct kbase_device *kbdev, struct seq_file *file) +void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev) { u32 max_csg_slots = kbdev->csf.global_iface.group_num; DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 }; @@ -116,6 +116,8 @@ static void update_active_groups_status(struct kbase_device *kbdev, struct seq_f * status of all on-slot groups when MCU sleep request is sent to it. */ if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { + /* Wait for the MCU sleep request to complete. */ + kbase_pm_wait_for_desired_state(kbdev); bitmap_copy(csg_slots_status_updated, kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots); return; @@ -496,23 +498,19 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, { u32 gr; struct kbase_context *const kctx = file->private; - struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_device *kbdev; if (WARN_ON(!kctx)) return -EINVAL; + kbdev = kctx->kbdev; + seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", MALI_CSF_CSG_DEBUGFS_VERSION); mutex_lock(&kctx->csf.lock); kbase_csf_scheduler_lock(kbdev); - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { - /* Wait for the MCU sleep request to complete. Please refer the - * update_active_groups_status() function for the explanation. - */ - kbase_pm_wait_for_desired_state(kbdev); - } - update_active_groups_status(kbdev, file); + kbase_csf_debugfs_update_active_groups_status(kbdev); for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { struct kbase_queue_group *const group = kctx->csf.queue_groups[gr]; @@ -546,13 +544,7 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, MALI_CSF_CSG_DEBUGFS_VERSION); kbase_csf_scheduler_lock(kbdev); - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { - /* Wait for the MCU sleep request to complete. Please refer the - * update_active_groups_status() function for the explanation. - */ - kbase_pm_wait_for_desired_state(kbdev); - } - update_active_groups_status(kbdev, file); + kbase_csf_debugfs_update_active_groups_status(kbdev); for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { struct kbase_queue_group *const group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h index 397e657d2cb6..16a548bf8acb 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,4 +44,11 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx); */ void kbase_csf_debugfs_init(struct kbase_device *kbdev); +/** + * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses + * + * @kbdev: Pointer to the device + */ +void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev); + #endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h index 32a1c557e387..e4a69cb169c3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h @@ -33,6 +33,10 @@ #include "mali_kbase_csf_event.h" #include +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +#include +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + /* Maximum number of KCPU command queues to be created per GPU address space. */ #define KBASEP_MAX_KCPU_QUEUES ((size_t)256) @@ -298,9 +302,9 @@ struct kbase_csf_notification { * * @kctx: Pointer to the base context with which this GPU command queue * is associated. - * @reg: Pointer to the region allocated from the shared - * interface segment for mapping the User mode - * input/output pages in MCU firmware address space. + * @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only + * valid (i.e. not 0 ) when the queue is enabled and its owner + * group has a runtime bound csg_reg (group region). * @phys: Pointer to the physical pages allocated for the * pair or User mode input/output page * @user_io_addr: Pointer to the permanent kernel mapping of User mode @@ -376,7 +380,7 @@ struct kbase_csf_notification { */ struct kbase_queue { struct kbase_context *kctx; - struct kbase_va_region *reg; + u64 user_io_gpu_va; struct tagged_addr phys[2]; char *user_io_addr; u64 handle; @@ -421,26 +425,33 @@ struct kbase_queue { /** * struct kbase_normal_suspend_buffer - Object representing a normal * suspend buffer for queue group. - * @reg: Memory region allocated for the normal-mode suspend buffer. + * @gpu_va: The start GPU VA address of the bound suspend buffer. Note, this + * field is only valid when the owner group has a region bound at + * runtime. * @phy: Array of physical memory pages allocated for the normal- * mode suspend buffer. */ struct kbase_normal_suspend_buffer { - struct kbase_va_region *reg; + u64 gpu_va; struct tagged_addr *phy; }; /** * struct kbase_protected_suspend_buffer - Object representing a protected * suspend buffer for queue group. - * @reg: Memory region allocated for the protected-mode suspend buffer. + * @gpu_va: The start GPU VA address of the bound protected mode suspend buffer. + * Note, this field is only valid when the owner group has a region + * bound at runtime. * @pma: Array of pointer to protected mode allocations containing * information about memory pages allocated for protected mode * suspend buffer. + * @alloc_retries: Number of times we retried allocing physical pages + * for protected suspend buffers. */ struct kbase_protected_suspend_buffer { - struct kbase_va_region *reg; + u64 gpu_va; struct protected_memory_allocation **pma; + u8 alloc_retries; }; /** @@ -512,6 +523,13 @@ struct kbase_protected_suspend_buffer { * @deschedule_deferred_cnt: Counter keeping a track of the number of threads * that tried to deschedule the group and had to defer * the descheduling due to the dump on fault. + * @csg_reg: An opaque pointer to the runtime bound shared regions. It is + * dynamically managed by the scheduler and can be NULL if the + * group is off-slot. + * @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts. + * It is accumulated on consecutive mapping attempt failures. On + * reaching a preset limit, the group is regarded as suffered + * a fatal error and triggers a fatal error notification. */ struct kbase_queue_group { struct kbase_context *kctx; @@ -562,6 +580,8 @@ struct kbase_queue_group { #if IS_ENABLED(CONFIG_DEBUG_FS) u32 deschedule_deferred_cnt; #endif + void *csg_reg; + u8 csg_reg_bind_retries; }; /** @@ -623,6 +643,8 @@ struct kbase_csf_cpu_queue_context { * @lock: Lock preventing concurrent access to the @in_use bitmap. * @in_use: Bitmap that indicates which heap context structures are currently * allocated (in @region). + * @heap_context_size_aligned: Size of a heap context structure, in bytes, + * aligned to GPU cacheline size. * * Heap context structures are allocated by the kernel for use by the firmware. * The current implementation subdivides a single GPU memory region for use as @@ -634,6 +656,7 @@ struct kbase_csf_heap_context_allocator { u64 gpu_va; struct mutex lock; DECLARE_BITMAP(in_use, MAX_TILER_HEAPS); + u32 heap_context_size_aligned; }; /** @@ -874,6 +897,33 @@ struct kbase_csf_sched_heap_reclaim_mgr { atomic_t unused_pages; }; +/** + * struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared + * interface segment regions for scheduler + * operations + * + * @array_csg_regs: Base pointer of an internally created array_csg_regs[]. + * @unused_csg_regs: List contains unused csg_regs items. When an item is bound to a + * group that is placed onto on-slot by the scheduler, it is dropped + * from the list (i.e busy active). The Scheduler will put an active + * item back when it's becoming off-slot (not in use). + * @dummy_phys: An array of dummy phys[nr_susp_pages] pages for use with normal + * and pmode suspend buffers, as a default replacement of a CSG's pages + * for the MMU mapping when the csg_reg is not bound to a group. + * @pma_phys: Pre-allocated array phy[nr_susp_pages] for transitional use with + * protected suspend buffer MMU map operations. + * @userio_mem_rd_flags: Userio input page's read access mapping configuration flags. + * @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true. + */ +struct kbase_csf_mcu_shared_regions { + void *array_csg_regs; + struct list_head unused_csg_regs; + struct tagged_addr *dummy_phys; + struct tagged_addr *pma_phys; + unsigned long userio_mem_rd_flags; + bool dummy_phys_allocated; +}; + /** * struct kbase_csf_scheduler - Object representing the scheduler used for * CSF for an instance of GPU platform device. @@ -1008,6 +1058,9 @@ struct kbase_csf_sched_heap_reclaim_mgr { * @interrupt_lock is used to serialize the access. * @protm_enter_time: GPU protected mode enter time. * @reclaim_mgr: CSGs tiler heap manager object. + * @mcu_regs_data: Scheduler MCU shared regions data for managing the + * shared interface mappings for on-slot queues and + * CSG suspend buffers. */ struct kbase_csf_scheduler { struct mutex lock; @@ -1051,6 +1104,7 @@ struct kbase_csf_scheduler { u32 tick_protm_pending_seq; ktime_t protm_enter_time; struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr; + struct kbase_csf_mcu_shared_regions mcu_regs_data; }; /* @@ -1328,6 +1382,24 @@ struct kbase_csf_firmware_log { u32 func_call_list_va_end; }; +/** + * struct kbase_csf_firmware_core_dump - Object containing members for handling + * firmware core dump. + * + * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer + * in Firmware. + * @version: Version of the FW image header core dump data format. Bits + * 7:0 specify version minor and 15:8 specify version major. + * @available: Flag to identify if the FW core dump buffer is available. + * True if entry is available in the FW image header and version + * is supported, False otherwise. + */ +struct kbase_csf_firmware_core_dump { + u32 mcu_regs_addr; + u16 version; + bool available; +}; + #if IS_ENABLED(CONFIG_DEBUG_FS) /** * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon @@ -1458,9 +1530,9 @@ struct kbase_csf_dump_on_fault { * the glb_pwoff register. This is separated from * the @p mcu_core_pwroff_dur_count as an update * to the latter is asynchronous. - * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time - * window in unit of ms. The firmware does not use it - * directly. + * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time + * window in unit of microseconds. The firmware does not + * use it directly. * @gpu_idle_dur_count: The counterpart of the hysteresis time window in * interface required format, ready to be used * directly in the firmware. @@ -1470,6 +1542,8 @@ struct kbase_csf_dump_on_fault { * HW counters. * @fw: Copy of the loaded MCU firmware image. * @fw_log: Contain members required for handling firmware log. + * @fw_core_dump: Contain members required for handling the firmware + * core dump. * @dof: Structure for dump on fault. */ struct kbase_csf_device { @@ -1507,15 +1581,22 @@ struct kbase_csf_device { u32 mcu_core_pwroff_dur_us; u32 mcu_core_pwroff_dur_count; u32 mcu_core_pwroff_reg_shadow; - u32 gpu_idle_hysteresis_ms; + u32 gpu_idle_hysteresis_us; u32 gpu_idle_dur_count; unsigned int fw_timeout_ms; struct kbase_csf_hwcnt hwcnt; struct kbase_csf_mcu_fw fw; struct kbase_csf_firmware_log fw_log; + struct kbase_csf_firmware_core_dump fw_core_dump; #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_dump_on_fault dof; #endif /* CONFIG_DEBUG_FS */ +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + /** + * @coresight: Coresight device structure. + */ + struct kbase_debug_coresight_device coresight; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ }; /** diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c index 1f4a4d9b6876..548657bc0a38 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c @@ -22,6 +22,7 @@ #include "mali_kbase.h" #include "mali_kbase_csf_firmware_cfg.h" #include "mali_kbase_csf_firmware_log.h" +#include "mali_kbase_csf_firmware_core_dump.h" #include "mali_kbase_csf_trace_buffer.h" #include "mali_kbase_csf_timeout.h" #include "mali_kbase_mem.h" @@ -38,7 +39,6 @@ #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include #include - #include #include #include @@ -81,7 +81,7 @@ MODULE_PARM_DESC(fw_debug, #define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) #define FIRMWARE_HEADER_VERSION_MAJOR (0ul) -#define FIRMWARE_HEADER_VERSION_MINOR (2ul) +#define FIRMWARE_HEADER_VERSION_MINOR (3ul) #define FIRMWARE_HEADER_LENGTH (0x14ul) #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ @@ -93,12 +93,13 @@ MODULE_PARM_DESC(fw_debug, CSF_FIRMWARE_ENTRY_ZERO | \ CSF_FIRMWARE_ENTRY_CACHE_MODE) -#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) -#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) -#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) -#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) +#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) +#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) +#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) +#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) -#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) #define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) #define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) @@ -120,7 +121,6 @@ MODULE_PARM_DESC(fw_debug, (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) - static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -286,6 +286,13 @@ static void boot_csf_firmware(struct kbase_device *kbdev) { kbase_csf_firmware_enable_mcu(kbdev); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + + if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) + dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled"); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + wait_for_firmware_boot(kbdev); } @@ -488,6 +495,7 @@ out: * @kbdev: Kbase device structure * @virtual_start: Start of the virtual address range required for an entry allocation * @virtual_end: End of the virtual address range required for an entry allocation + * @flags: Firmware entry flags for comparison with the reusable pages found * @phys: Pointer to the array of physical (tagged) addresses making up the new * FW interface entry. It is an output parameter which would be made to * point to an already existing array allocated for the previously parsed @@ -508,10 +516,12 @@ out: * * Return: true if a large page can be reused, false otherwise. */ -static inline bool entry_find_large_page_to_reuse( - struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end, - struct tagged_addr **phys, struct protected_memory_allocation ***pma, - u32 num_pages, u32 *num_pages_aligned, bool *is_small_page) +static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, + const u32 virtual_start, const u32 virtual_end, + const u32 flags, struct tagged_addr **phys, + struct protected_memory_allocation ***pma, + u32 num_pages, u32 *num_pages_aligned, + bool *is_small_page) { struct kbase_csf_firmware_interface *interface = NULL; struct kbase_csf_firmware_interface *target_interface = NULL; @@ -557,7 +567,7 @@ static inline bool entry_find_large_page_to_reuse( if (interface->virtual & (SZ_2M - 1)) continue; - if (virtual_diff < virtual_diff_min) { + if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) { target_interface = interface; virtual_diff_min = virtual_diff; } @@ -620,6 +630,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, struct protected_memory_allocation **pma = NULL; bool reuse_pages = false; bool is_small_page = true; + bool ignore_page_migration = true; if (data_end < data_start) { dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", @@ -662,9 +673,9 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, num_pages = (virtual_end - virtual_start) >> PAGE_SHIFT; - reuse_pages = entry_find_large_page_to_reuse( - kbdev, virtual_start, virtual_end, &phys, &pma, - num_pages, &num_pages_aligned, &is_small_page); + reuse_pages = + entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys, + &pma, num_pages, &num_pages_aligned, &is_small_page); if (!reuse_pages) phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL); @@ -685,6 +696,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), num_pages_aligned, phys, false); + ignore_page_migration = false; } } @@ -794,7 +806,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, + ignore_page_migration); if (ret != 0) { dev_err(kbdev->dev, "Failed to insert firmware pages\n"); @@ -1023,20 +1036,26 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs return parse_build_info_metadata_entry(kbdev, fw, entry, size); case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST: /* Function call list section */ - if (size < 2 * sizeof(*entry)) { + if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) { dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", size); return -EINVAL; } kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); - break; - } - - if (!optional) { - dev_err(kbdev->dev, - "Unsupported non-optional entry type %u in firmware\n", - type); - return -EINVAL; + return 0; + case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP: + /* Core Dump section */ + if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size); + return -EINVAL; + } + return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry); + default: + if (!optional) { + dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n", + type); + return -EINVAL; + } } return 0; @@ -1687,6 +1706,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); } +static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_debug_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_debug_req ^= req_mask; + + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); +} + +static void request_fw_core_dump( + const struct kbase_csf_global_iface *const global_iface) +{ + uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + + set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} + +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int ret; + + /* Serialize CORE_DUMP requests. */ + mutex_lock(&kbdev->csf.reg_lock); + + /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + request_fw_core_dump(global_iface); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ + ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + if (!ret) + WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} /** * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core @@ -1714,7 +1798,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | - GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; + GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1740,6 +1824,14 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + /* Enable FW MCU read/write debug interfaces */ + kbase_csf_firmware_global_input_mask( + global_iface, GLB_DEBUG_ACK_IRQ_MASK, + GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK, + GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -1890,12 +1982,12 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); } -static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us) { #define HYSTERESIS_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); - u64 dur_val = dur_ms; + u64 dur_val = dur_us; u32 cnt_val_u32, reg_val_u32; bool src_system_timestamp = freq > 0; @@ -1913,9 +2005,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } - /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ + /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; - dur_val = div_u64(dur_val, 1000); + dur_val = div_u64(dur_val, 1000000); /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; @@ -1938,7 +2030,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_ms; + dur = kbdev->csf.gpu_idle_hysteresis_us; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; @@ -1955,7 +2047,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); @@ -1986,7 +2078,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -2166,14 +2258,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev) int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; + kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); return 0; } @@ -2353,6 +2445,10 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) goto err_out; } +#ifdef CONFIG_MALI_FW_CORE_DUMP + kbase_csf_firmware_core_dump_init(kbdev); +#endif + /* Firmware loaded successfully, ret = 0 */ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, (((u64)version_hash) << 32) | @@ -2470,6 +2566,119 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) kbdev->as_free |= MCU_AS_BITMASK; } +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const reg_val) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err; + u32 glb_req; + + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Set the address and value to write */ + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val); + + /* Set the Global Debug request for FW MCU write */ + glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK; + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, + GLB_DEBUG_REQ_FW_AS_WRITE_MASK); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); + + /* Notify FW about the Global Debug request */ + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + + mutex_unlock(&kbdev->csf.reg_lock); + + dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val); + + return err; +} + +int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, + u32 *reg_val) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err; + u32 glb_req; + + if (WARN_ON(reg_val == NULL)) + return -EINVAL; + + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Set the address to read */ + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); + + /* Set the Global Debug request for FW MCU read */ + glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK; + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, + GLB_DEBUG_REQ_FW_AS_READ_MASK); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); + + /* Notify FW about the Global Debug request */ + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + + if (!err) { + kbase_csf_scheduler_spin_lock(kbdev, &flags); + *reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } + + mutex_unlock(&kbdev->csf.reg_lock); + + dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val); + + return err; +} + +int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const val_mask, u32 const reg_val) +{ + unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies; + u32 read_val; + + dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask); + + while (time_before(jiffies, remaining)) { + int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val); + + if (err) { + dev_err(kbdev->dev, + "Error reading MCU register value (read_val = %u, expect = %u)\n", + read_val, reg_val); + return err; + } + + if ((read_val & val_mask) == reg_val) + return 0; + } + + dev_err(kbdev->dev, + "Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n", + read_val, reg_val); + + return -ETIMEDOUT; +} +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; @@ -2848,7 +3057,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); if (ret) goto mmu_insert_pages_error; @@ -2909,4 +3118,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term( vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } - diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h index 7560a298ac9c..714a14001189 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h @@ -246,7 +246,6 @@ void kbase_csf_firmware_csg_input_mask( u32 kbase_csf_firmware_csg_output( const struct kbase_csf_cmd_stream_group_info *info, u32 offset); - /** * struct kbase_csf_global_iface - Global CSF interface * provided by the firmware. @@ -450,6 +449,50 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev); */ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +/** + * kbase_csf_firmware_mcu_register_write - Write to MCU register + * + * @kbdev: Instance of a gpu platform device that implements a csf interface. + * @reg_addr: Register address to write into + * @reg_val: Value to be written + * + * Write a desired value to a register in MCU address space. + * + * return: 0 on success, or negative on failure. + */ +int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const reg_val); +/** + * kbase_csf_firmware_mcu_register_read - Read from MCU register + * + * @kbdev: Instance of a gpu platform device that implements a csf interface. + * @reg_addr: Register address to read from + * @reg_val: Value as present in reg_addr register + * + * Read a value from MCU address space. + * + * return: 0 on success, or negative on failure. + */ +int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, + u32 *reg_val); + +/** + * kbase_csf_firmware_mcu_register_poll - Poll MCU register + * + * @kbdev: Instance of a gpu platform device that implements a csf interface. + * @reg_addr: Register address to read from + * @val_mask: Value to mask the read value for comparison + * @reg_val: Value to be compared against + * + * Continue to read a value from MCU address space until it matches given mask and value. + * + * return: 0 on success, or negative on failure. + */ +int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const val_mask, u32 const reg_val); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + /** * kbase_csf_firmware_ping - Send the ping request to firmware. * @@ -858,5 +901,16 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) */ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); +/** + * kbase_csf_firmware_req_core_dump - Request a firmware core dump + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Request a firmware core dump and wait for for firmware to acknowledge. + * Firmware will enter infinite loop after the firmware core dump is created. + * + * Return: 0 if success, or negative error code on failure. + */ +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev); #endif diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c new file mode 100644 index 000000000000..f0a10d197eec --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "mali_kbase.h" +#include "mali_kbase_csf_firmware_core_dump.h" +#include "backend/gpu/mali_kbase_pm_internal.h" + +/* Page size in bytes in use by MCU. */ +#define FW_PAGE_SIZE 4096 + +/* + * FW image header core dump data format supported. + * Currently only version 0.1 is supported. + */ +#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0 +#define FW_CORE_DUMP_DATA_VERSION_MINOR 1 + +/* Full version of the image header core dump data format */ +#define FW_CORE_DUMP_DATA_VERSION \ + ((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR) + +/* Validity flag to indicate if the MCU registers in the buffer are valid */ +#define FW_MCU_STATUS_MASK 0x1 +#define FW_MCU_STATUS_VALID (1 << 0) + +/* Core dump entry fields */ +#define FW_CORE_DUMP_VERSION_INDEX 0 +#define FW_CORE_DUMP_START_ADDR_INDEX 1 + +/* MCU registers stored by a firmware core dump */ +struct fw_core_dump_mcu { + u32 r0; + u32 r1; + u32 r2; + u32 r3; + u32 r4; + u32 r5; + u32 r6; + u32 r7; + u32 r8; + u32 r9; + u32 r10; + u32 r11; + u32 r12; + u32 sp; + u32 lr; + u32 pc; +}; + +/* Any ELF definitions used in this file are from elf.h/elfcore.h except + * when specific 32-bit versions are required (mainly for the + * ELF_PRSTATUS32 note that is used to contain the MCU registers). + */ + +/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */ +struct prstatus32_timeval { + int tv_sec; + int tv_usec; +}; + +/* - Structure defining ELF32 PRSTATUS note contents, as defined by the + * GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h. + * Note: GDB checks for the size of this structure to be 0x94. + * Modified pr_reg (array containing the Arm 32-bit MCU registers) to + * use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs. + */ +struct elf_prstatus32 { + struct elf_siginfo pr_info; /* Info associated with signal. */ + short int pr_cursig; /* Current signal. */ + unsigned int pr_sigpend; /* Set of pending signals. */ + unsigned int pr_sighold; /* Set of held signals. */ + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct prstatus32_timeval pr_utime; /* User time. */ + struct prstatus32_timeval pr_stime; /* System time. */ + struct prstatus32_timeval pr_cutime; /* Cumulative user time. */ + struct prstatus32_timeval pr_cstime; /* Cumulative system time. */ + u32 pr_reg[18]; /* GP registers. */ + int pr_fpvalid; /* True if math copro being used. */ +}; + +/** + * struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump' + * debugfs file. + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +struct fw_core_dump_data { + struct kbase_device *kbdev; +}; + +/* + * struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump' + * debugfs file. + * @interface: current firmware memory interface + * @page_num: current page number (0..) within @interface + */ +struct fw_core_dump_seq_off { + struct kbase_csf_firmware_interface *interface; + u32 page_num; +}; + +/** + * fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @regs: Pointer to a core dump mcu struct where the MCU registers are copied + * to. Should be allocated by the called. + * + * Return: 0 if successfully copied the MCU registers, negative error code otherwise. + */ +static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs) +{ + unsigned int i; + u32 status = 0; + u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr; + u32 *data = (u32 *)regs; + + /* Check if the core dump entry exposed the buffer */ + if (!regs || !kbdev->csf.fw_core_dump.available) + return -EPERM; + + /* Check if the data in the buffer is valid, if not, return error */ + kbase_csf_read_firmware_memory(kbdev, data_addr, &status); + if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID) + return -EPERM; + + /* According to image header documentation, the MCU registers core dump + * buffer is 32-bit aligned. + */ + for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i) + kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]); + + return 0; +} + +/** + * fw_core_dump_fill_elf_header - Initializes an ELF32 header + * @hdr: ELF32 header to initialize + * @sections: Number of entries in the ELF program header table + * + * Initializes an ELF32 header for an ARM 32-bit little-endian + * 'Core file' object file. + */ +static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections) +{ + /* Reset all members in header. */ + memset(hdr, 0, sizeof(*hdr)); + + /* Magic number identifying file as an ELF object. */ + memcpy(hdr->e_ident, ELFMAG, SELFMAG); + + /* Identify file as 32-bit, little-endian, using current + * ELF header version, with no OS or ABI specific ELF + * extensions used. + */ + hdr->e_ident[EI_CLASS] = ELFCLASS32; + hdr->e_ident[EI_DATA] = ELFDATA2LSB; + hdr->e_ident[EI_VERSION] = EV_CURRENT; + hdr->e_ident[EI_OSABI] = ELFOSABI_NONE; + + /* 'Core file' type of object file. */ + hdr->e_type = ET_CORE; + + /* ARM 32-bit architecture (AARCH32) */ + hdr->e_machine = EM_ARM; + + /* Object file version: the original format. */ + hdr->e_version = EV_CURRENT; + + /* Offset of program header table in file. */ + hdr->e_phoff = sizeof(struct elf32_hdr); + + /* No processor specific flags. */ + hdr->e_flags = 0; + + /* Size of the ELF header in bytes. */ + hdr->e_ehsize = sizeof(struct elf32_hdr); + + /* Size of the ELF program header entry in bytes. */ + hdr->e_phentsize = sizeof(struct elf32_phdr); + + /* Number of entries in the program header table. */ + hdr->e_phnum = sections; +} + +/** + * fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header + * for holding auxiliary information + * @phdr: ELF32 program header + * @file_offset: Location of the note in the file in bytes + * @size: Size of the note in bytes. + * + * Initializes an ELF32 program header describing auxiliary information (containing + * one or more notes) of @size bytes alltogether located in the file at offset + * @file_offset. + */ +static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset, + u32 size) +{ + /* Auxiliary information (note) in program header. */ + phdr->p_type = PT_NOTE; + + /* Location of first note in file in bytes. */ + phdr->p_offset = file_offset; + + /* Size of all notes combined in bytes. */ + phdr->p_filesz = size; + + /* Other members not relevant for a note. */ + phdr->p_vaddr = 0; + phdr->p_paddr = 0; + phdr->p_memsz = 0; + phdr->p_align = 0; + phdr->p_flags = 0; +} + +/** + * fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment + * @phdr: ELF32 program header to initialize. + * @file_offset: Location of loadable segment in file in bytes + * (aligned to FW_PAGE_SIZE bytes) + * @vaddr: 32-bit virtual address where to write the segment + * (aligned to FW_PAGE_SIZE bytes) + * @size: Size of the segment in bytes. + * @flags: CSF_FIRMWARE_ENTRY_* flags describing access permissions. + * + * Initializes an ELF32 program header describing a loadable segment of + * @size bytes located in the file at offset @file_offset to be loaded + * at virtual address @vaddr with access permissions as described by + * CSF_FIRMWARE_ENTRY_* flags in @flags. + */ +static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset, + u32 vaddr, u32 size, u32 flags) +{ + /* Loadable segment in program header. */ + phdr->p_type = PT_LOAD; + + /* Location of segment in file in bytes. Aligned to p_align bytes. */ + phdr->p_offset = file_offset; + + /* Virtual address of segment. Aligned to p_align bytes. */ + phdr->p_vaddr = vaddr; + + /* Physical address of segment. Not relevant. */ + phdr->p_paddr = 0; + + /* Size of segment in file and memory. */ + phdr->p_filesz = size; + phdr->p_memsz = size; + + /* Alignment of segment in the file and memory in bytes (integral power of 2). */ + phdr->p_align = FW_PAGE_SIZE; + + /* Set segment access permissions. */ + phdr->p_flags = 0; + if (flags & CSF_FIRMWARE_ENTRY_READ) + phdr->p_flags |= PF_R; + if (flags & CSF_FIRMWARE_ENTRY_WRITE) + phdr->p_flags |= PF_W; + if (flags & CSF_FIRMWARE_ENTRY_EXECUTE) + phdr->p_flags |= PF_X; +} + +/** + * fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note + * @name: Name given to the PRSTATUS note. + * + * Calculates the size of a 32-bit PRSTATUS note (which contains information + * about a process like the current MCU registers) taking into account + * @name must be padded to a 4-byte multiple. + * + * Return: size of 32-bit PRSTATUS note in bytes. + */ +static unsigned int fw_core_dump_get_prstatus_note_size(char *name) +{ + return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) + + sizeof(struct elf_prstatus32); +} + +/** + * fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure + * @prs: ELF32 PRSTATUS note to initialize + * @regs: MCU registers to copy into the PRSTATUS note + * + * Initializes an ELF32 PRSTATUS structure with MCU registers @regs. + * Other process information is N/A for CSF Firmware. + */ +static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs, + struct fw_core_dump_mcu *regs) +{ + /* Only fill in registers (32-bit) of PRSTATUS note. */ + memset(prs, 0, sizeof(*prs)); + prs->pr_reg[0] = regs->r0; + prs->pr_reg[1] = regs->r1; + prs->pr_reg[2] = regs->r2; + prs->pr_reg[3] = regs->r3; + prs->pr_reg[4] = regs->r4; + prs->pr_reg[5] = regs->r5; + prs->pr_reg[6] = regs->r0; + prs->pr_reg[7] = regs->r7; + prs->pr_reg[8] = regs->r8; + prs->pr_reg[9] = regs->r9; + prs->pr_reg[10] = regs->r10; + prs->pr_reg[11] = regs->r11; + prs->pr_reg[12] = regs->r12; + prs->pr_reg[13] = regs->sp; + prs->pr_reg[14] = regs->lr; + prs->pr_reg[15] = regs->pc; +} + +/** + * fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note + * @name: Name for the PRSTATUS note + * @prs: ELF32 PRSTATUS structure to put in the PRSTATUS note + * @created_prstatus_note: + * Pointer to the allocated ELF32 PRSTATUS note + * + * Creates an ELF32 note with one PRSTATUS entry containing the + * ELF32 PRSTATUS structure @prs. Caller needs to free the created note in + * @created_prstatus_note. + * + * Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes. + */ +static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs, + struct elf32_note **created_prstatus_note) +{ + struct elf32_note *note; + unsigned int note_name_sz; + unsigned int note_sz; + + /* Allocate memory for ELF32 note containing a PRSTATUS note. */ + note_name_sz = strlen(name) + 1; + note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) + + sizeof(struct elf_prstatus32); + note = kmalloc(note_sz, GFP_KERNEL); + if (!note) + return 0; + + /* Fill in ELF32 note with one entry for a PRSTATUS note. */ + note->n_namesz = note_name_sz; + note->n_descsz = sizeof(struct elf_prstatus32); + note->n_type = NT_PRSTATUS; + memcpy(note + 1, name, note_name_sz); + memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs)); + + /* Return pointer and size of the created ELF32 note. */ + *created_prstatus_note = note; + return note_sz; +} + +/** + * fw_core_dump_write_elf_header - Writes ELF header for the FW core dump + * @m: the seq_file handle + * + * Writes the ELF header of the core dump including program headers for + * memory sections and a note containing the current MCU register + * values. + * + * Excludes memory sections without read access permissions or + * are for protected memory. + * + * The data written is as follows: + * - ELF header + * - ELF PHDRs for memory sections + * - ELF PHDR for program header NOTE + * - ELF PRSTATUS note + * - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE + * + * The actual memory section dumps should follow this (not written + * by this function). + * + * Retrieves the necessary information via the struct + * fw_core_dump_data stored in the private member of the seq_file + * handle. + * + * Return: + * * 0 - success + * * -ENOMEM - not enough memory for allocating ELF32 note + */ +static int fw_core_dump_write_elf_header(struct seq_file *m) +{ + struct elf32_hdr hdr; + struct elf32_phdr phdr; + struct fw_core_dump_data *dump_data = m->private; + struct kbase_device *const kbdev = dump_data->kbdev; + struct kbase_csf_firmware_interface *interface; + struct elf_prstatus32 elf_prs; + struct elf32_note *elf_prstatus_note; + unsigned int sections = 0; + unsigned int elf_prstatus_note_size; + u32 elf_prstatus_offset; + u32 elf_phdr_note_offset; + u32 elf_memory_sections_data_offset; + u32 total_pages = 0; + u32 padding_size, *padding; + struct fw_core_dump_mcu regs = { 0 }; + + /* Count number of memory sections. */ + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + sections++; + } + + /* Prepare ELF header. */ + fw_core_dump_fill_elf_header(&hdr, sections + 1); + seq_write(m, &hdr, sizeof(struct elf32_hdr)); + + elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE"); + /* PHDRs of PT_LOAD type. */ + elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr); + /* PHDR of PT_NOTE type. */ + elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr); + elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size; + + /* Calculate padding size to page offset. */ + padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) - + elf_memory_sections_data_offset; + elf_memory_sections_data_offset += padding_size; + + /* Prepare ELF program header table. */ + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + + fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset, + interface->virtual, + interface->num_pages * FW_PAGE_SIZE, + interface->flags); + + seq_write(m, &phdr, sizeof(struct elf32_phdr)); + + elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE; + total_pages += interface->num_pages; + } + + /* Prepare PHDR of PT_NOTE type. */ + fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset, + elf_prstatus_note_size); + seq_write(m, &phdr, sizeof(struct elf32_phdr)); + + /* Prepare ELF note of PRSTATUS type. */ + if (fw_get_core_dump_mcu(kbdev, ®s)) + dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero"); + /* Even if MCU Registers are not available the ELF prstatus is still + * filled with the registers equal to zero. + */ + fw_core_dump_fill_elf_prstatus(&elf_prs, ®s); + elf_prstatus_note_size = + fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note); + if (elf_prstatus_note_size == 0) + return -ENOMEM; + + seq_write(m, elf_prstatus_note, elf_prstatus_note_size); + kfree(elf_prstatus_note); + + /* Pad file to page size. */ + padding = kzalloc(padding_size, GFP_KERNEL); + seq_write(m, padding, padding_size); + kfree(padding); + + return 0; +} + +/** + * fw_core_dump_create - Requests firmware to save state for a firmware core dump + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_create(struct kbase_device *kbdev) +{ + int err; + + /* Ensure MCU is active before requesting the core dump. */ + kbase_csf_scheduler_pm_active(kbdev); + err = kbase_csf_scheduler_wait_mcu_active(kbdev); + if (!err) + err = kbase_csf_firmware_req_core_dump(kbdev); + + kbase_csf_scheduler_pm_idle(kbdev); + + return err; +} + +/** + * fw_core_dump_seq_start - seq_file start operation for firmware core dump file + * @m: the seq_file handle + * @_pos: holds the current position in pages + * (0 or most recent position used in previous session) + * + * Starts a seq_file session, positioning the iterator for the session to page @_pos - 1 + * within the firmware interface memory sections. @_pos value 0 is used to indicate the + * position of the ELF header at the start of the file. + * + * Retrieves the necessary information via the struct fw_core_dump_data stored in + * the private member of the seq_file handle. + * + * Return: + * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off + * * SEQ_START_TOKEN - special iterator pointer indicating its is the start of the file + * * NULL - iterator could not be allocated + */ +static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos) +{ + struct fw_core_dump_data *dump_data = m->private; + struct fw_core_dump_seq_off *data; + struct kbase_csf_firmware_interface *interface; + loff_t pos = *_pos; + + if (pos == 0) + return SEQ_START_TOKEN; + + /* Move iterator in the right position based on page number within + * available pages of firmware interface memory sections. + */ + pos--; /* ignore start token */ + list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + + if (pos >= interface->num_pages) { + pos -= interface->num_pages; + } else { + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->interface = interface; + data->page_num = pos; + return data; + } + } + + return NULL; +} + +/** + * fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * + * Closes the current session and frees any memory related. + */ +static void fw_core_dump_seq_stop(struct seq_file *m, void *v) +{ + kfree(v); +} + +/** + * fw_core_dump_seq_next - seq_file next operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * @pos: holds the current position in pages + * (0 or most recent position used in previous session) + * + * Moves the iterator @v forward to the next page within the firmware interface + * memory sections and returns the updated position in @pos. + * @v value SEQ_START_TOKEN indicates the ELF header position. + * + * Return: + * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off + * * NULL - iterator could not be allocated + */ +static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct fw_core_dump_data *dump_data = m->private; + struct fw_core_dump_seq_off *data = v; + struct kbase_csf_firmware_interface *interface; + struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces; + + /* Is current position at the ELF header ? */ + if (v == SEQ_START_TOKEN) { + if (list_empty(interfaces)) + return NULL; + + /* Prepare iterator for starting at first page in firmware interface + * memory sections. + */ + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->interface = + list_first_entry(interfaces, struct kbase_csf_firmware_interface, node); + data->page_num = 0; + ++*pos; + return data; + } + + /* First attempt to satisfy from current firmware interface memory section. */ + interface = data->interface; + if (data->page_num + 1 < interface->num_pages) { + data->page_num++; + ++*pos; + return data; + } + + /* Need next firmware interface memory section. This could be the last one. */ + if (list_is_last(&interface->node, interfaces)) { + kfree(data); + return NULL; + } + + /* Move to first page in next firmware interface memory section. */ + data->interface = list_next_entry(interface, node); + data->page_num = 0; + ++*pos; + + return data; +} + +/** + * fw_core_dump_seq_show - seq_file show operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * + * Writes the current page in a firmware interface memory section indicated + * by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF + * header is written. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_seq_show(struct seq_file *m, void *v) +{ + struct fw_core_dump_seq_off *data = v; + struct page *page; + u32 *p; + + /* Either write the ELF header or current page. */ + if (v == SEQ_START_TOKEN) + return fw_core_dump_write_elf_header(m); + + /* Write the current page. */ + page = as_page(data->interface->phys[data->page_num]); + p = kmap_atomic(page); + seq_write(m, p, FW_PAGE_SIZE); + kunmap_atomic(p); + + return 0; +} + +/* Sequence file operations for firmware core dump file. */ +static const struct seq_operations fw_core_dump_seq_ops = { + .start = fw_core_dump_seq_start, + .next = fw_core_dump_seq_next, + .stop = fw_core_dump_seq_stop, + .show = fw_core_dump_seq_show, +}; + +/** + * fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file + * @inode: inode of the file + * @file: file pointer + * + * Prepares for servicing a write request to request a core dump from firmware and + * a read request to retrieve the core dump. + * + * Returns an error if the firmware is not initialized yet. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file) +{ + struct kbase_device *const kbdev = inode->i_private; + struct fw_core_dump_data *dump_data; + int ret; + + /* Fail if firmware is not initialized yet. */ + if (!kbdev->csf.firmware_inited) { + ret = -ENODEV; + goto open_fail; + } + + /* Open a sequence file for iterating through the pages in the + * firmware interface memory pages. seq_open stores a + * struct seq_file * in the private_data field of @file. + */ + ret = seq_open(file, &fw_core_dump_seq_ops); + if (ret) + goto open_fail; + + /* Allocate a context for sequence file operations. */ + dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL); + if (!dump_data) { + ret = -ENOMEM; + goto out; + } + + /* Kbase device will be shared with sequence file operations. */ + dump_data->kbdev = kbdev; + + /* Link our sequence file context. */ + ((struct seq_file *)file->private_data)->private = dump_data; + + return 0; +out: + seq_release(inode, file); +open_fail: + return ret; +} + +/** + * fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file + * @file: file pointer + * @ubuf: user buffer containing data to store + * @count: number of bytes in user buffer + * @ppos: file position + * + * Any data written to the file triggers a firmware core dump request which + * subsequently can be retrieved by reading from the file. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count, + loff_t *ppos) +{ + int err; + struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; + struct kbase_device *const kbdev = dump_data->kbdev; + + CSTD_UNUSED(ppos); + + err = fw_core_dump_create(kbdev); + + return err ? err : count; +} + +/** + * fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file + * @inode: inode of the file + * @file: file pointer + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file) +{ + struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; + + seq_release(inode, file); + + kfree(dump_data); + + return 0; +} +/* Debugfs file operations for firmware core dump file. */ +static const struct file_operations kbase_csf_fw_core_dump_fops = { + .owner = THIS_MODULE, + .open = fw_core_dump_debugfs_open, + .read = seq_read, + .write = fw_core_dump_debugfs_write, + .llseek = seq_lseek, + .release = fw_core_dump_debugfs_release, +}; + +void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev) +{ +#if IS_ENABLED(CONFIG_DEBUG_FS) + debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_fw_core_dump_fops); +#endif /* CONFIG_DEBUG_FS */ +} + +int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry) +{ + /* Casting to u16 as version is defined by bits 15:0 */ + kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX]; + + if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION) + return -EPERM; + + kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX]; + kbdev->csf.fw_core_dump.available = true; + + return 0; +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h new file mode 100644 index 000000000000..0537dca4f37f --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ +#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ + +struct kbase_device; + +/** Offset of the last field of core dump entry from the image header */ +#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4) + +/** + * kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from + * the image header. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @entry: Pointer to section. + * + * Read a "core dump" entry from the image header, check the version for + * compatibility and store the address pointer. + * + * Return: 0 if successfully parse entry, negative error code otherwise. + */ +int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry); + +/** + * kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * Must be zero-initialized. + * + * Creates the fw_core_dump debugfs file through which to request a firmware + * core dump. The created debugfs file is cleaned up as part of kbdev debugfs + * cleanup. + * + * The fw_core_dump debugs file that case be used in the following way: + * + * To explicitly request core dump: + * echo 1 >/sys/kernel/debug/mali0/fw_core_dump + * + * To output current core dump (after explicitly requesting a core dump, or + * kernel driver reported an internal firmware error): + * cat /sys/kernel/debug/mali0/fw_core_dump + */ +void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev); + +#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c index 20d8c0d4fdb1..6e0d3c2f5071 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c @@ -85,7 +85,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count); enable_bits_count = 64; } - new_mask = val & ((1 << enable_bits_count) - 1); + new_mask = val & (UINT64_MAX >> (64 - enable_bits_count)); if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb)) return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask); @@ -353,7 +353,7 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo diff = callee_address - calling_address - 4; sign = !!(diff & 0x80000000); - if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff && + if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff || ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) { dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping", calling_address); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h index 8d7a2210a457..1008320464a9 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h @@ -24,6 +24,9 @@ #include +/** Offset of the last field of functions call list entry from the image header */ +#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8) + /* * Firmware log dumping buffer size. */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c index f414d8894306..ab25ed4429e3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c @@ -32,7 +32,8 @@ #include "mali_kbase_csf_scheduler.h" #include "mmu/mali_kbase_mmu.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" -#include +#include +#include #include #include @@ -104,7 +105,6 @@ struct dummy_firmware_interface { (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) - static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -716,6 +716,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); } +static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_debug_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_debug_req ^= req_mask; + + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); +} + +static void request_fw_core_dump( + const struct kbase_csf_global_iface *const global_iface) +{ + uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + + set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} + +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int ret; + + /* Serialize CORE_DUMP requests. */ + mutex_lock(&kbdev->csf.reg_lock); + + /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + request_fw_core_dump(global_iface); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ + ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + if (!ret) + WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} static void global_init(struct kbase_device *const kbdev, u64 core_mask) { @@ -724,8 +789,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | - GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | - 0; + GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -917,7 +981,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_ms; + dur = kbdev->csf.gpu_idle_hysteresis_us; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; @@ -934,7 +998,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); @@ -965,7 +1029,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -1076,14 +1140,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev) int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; + kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); return 0; } @@ -1166,8 +1230,6 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) /* NO_MALI: Don't stop firmware or unload MMU tables */ - kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); - kbase_csf_scheduler_term(kbdev); kbase_csf_free_dummy_user_reg_page(kbdev); @@ -1197,6 +1259,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) * entry parsed from the firmware image. */ kbase_mcu_shared_interface_region_tracker_term(kbdev); + + kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); } void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) @@ -1533,7 +1597,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); if (ret) goto mmu_insert_pages_error; @@ -1594,4 +1658,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term( vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } - diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c index 1876d505dd5b..42d19e1b6ad7 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c @@ -23,10 +23,7 @@ #include "mali_kbase_csf_heap_context_alloc.h" /* Size of one heap context structure, in bytes. */ -#define HEAP_CTX_SIZE ((size_t)32) - -/* Total size of the GPU memory region allocated for heap contexts, in bytes. */ -#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE) +#define HEAP_CTX_SIZE ((u32)32) /** * sub_alloc - Sub-allocate a heap context from a GPU memory region @@ -38,8 +35,8 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) { struct kbase_context *const kctx = ctx_alloc->kctx; - int heap_nr = 0; - size_t ctx_offset = 0; + unsigned long heap_nr = 0; + u32 ctx_offset = 0; u64 heap_gpu_va = 0; struct kbase_vmap_struct mapping; void *ctx_ptr = NULL; @@ -55,29 +52,64 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) return 0; } - ctx_offset = heap_nr * HEAP_CTX_SIZE; + ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned; heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, - HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping); + ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping); if (unlikely(!ctx_ptr)) { dev_err(kctx->kbdev->dev, - "Failed to map tiler heap context %d (0x%llX)\n", + "Failed to map tiler heap context %lu (0x%llX)\n", heap_nr, heap_gpu_va); return 0; } - memset(ctx_ptr, 0, HEAP_CTX_SIZE); + memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned); kbase_vunmap(ctx_ptr, &mapping); bitmap_set(ctx_alloc->in_use, heap_nr, 1); - dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n", + dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", heap_nr, heap_gpu_va); return heap_gpu_va; } +/** + * evict_heap_context - Evict the data of heap context from GPU's L2 cache. + * + * @ctx_alloc: Pointer to the heap context allocator. + * @heap_gpu_va: The GPU virtual address of a heap context structure to free. + * + * This function is called when memory for the heap context is freed. It uses the + * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs + * there is nothing done. The whole GPU cache is anyways expected to be flushed + * on older GPUs when initial chunks of the heap are freed just before the memory + * for heap context is freed. + */ +static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc, + u64 const heap_gpu_va) +{ + struct kbase_context *const kctx = ctx_alloc->kctx; + u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va); + u32 offset_within_page = offset_in_bytes & ~PAGE_MASK; + u32 page_index = offset_in_bytes >> PAGE_SHIFT; + struct tagged_addr page = + kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index]; + phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page; + + lockdep_assert_held(&ctx_alloc->lock); + + /* There is no need to take vm_lock here as the ctx_alloc region is no_user_free + * refcounted. The region and the backing page can't disappear whilst this + * function is executing. + * Flush type is passed as FLUSH_PT to CLN+INV L2 only. + */ + kbase_mmu_flush_pa_range(kctx->kbdev, kctx, + heap_context_pa, ctx_alloc->heap_context_size_aligned, + KBASE_MMU_OP_FLUSH_PT); +} + /** * sub_free - Free a heap context sub-allocated from a GPU memory region * @@ -88,7 +120,7 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, u64 const heap_gpu_va) { struct kbase_context *const kctx = ctx_alloc->kctx; - u64 ctx_offset = 0; + u32 ctx_offset = 0; unsigned int heap_nr = 0; lockdep_assert_held(&ctx_alloc->lock); @@ -99,13 +131,15 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va)) return; - ctx_offset = heap_gpu_va - ctx_alloc->gpu_va; + ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va); - if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) || - WARN_ON(ctx_offset % HEAP_CTX_SIZE)) + if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) || + WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned)) return; - heap_nr = ctx_offset / HEAP_CTX_SIZE; + evict_heap_context(ctx_alloc, heap_gpu_va); + + heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned; dev_dbg(kctx->kbdev->dev, "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); @@ -116,12 +150,17 @@ int kbase_csf_heap_context_allocator_init( struct kbase_csf_heap_context_allocator *const ctx_alloc, struct kbase_context *const kctx) { + const u32 gpu_cache_line_size = + (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); + /* We cannot pre-allocate GPU memory here because the * custom VA zone may not have been created yet. */ ctx_alloc->kctx = kctx; ctx_alloc->region = NULL; ctx_alloc->gpu_va = 0; + ctx_alloc->heap_context_size_aligned = + (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1); mutex_init(&ctx_alloc->lock); bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); @@ -142,7 +181,14 @@ void kbase_csf_heap_context_allocator_term( if (ctx_alloc->region) { kbase_gpu_vm_lock(kctx); - ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * We can't enforce (nor check) the no_user_free refcount + * to be 0 here as other code regions can take such a reference. + * Anyway, this isn't an issue as the region will eventually + * be freed by the region tracker if its refcount didn't drop + * to 0. + */ + kbase_va_region_no_user_free_put(kctx, ctx_alloc->region); kbase_mem_free_region(kctx, ctx_alloc->region); kbase_gpu_vm_unlock(kctx); } @@ -156,7 +202,7 @@ u64 kbase_csf_heap_context_allocator_alloc( struct kbase_context *const kctx = ctx_alloc->kctx; u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; - u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE); + u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned); u64 heap_gpu_va = 0; /* Calls to this function are inherently asynchronous, with respect to diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c index 0b3f1334a9e6..f1727224b243 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c @@ -80,7 +80,14 @@ static int kbase_kcpu_map_import_prepare( * on the physical pages tracking object. When the last * reference to the tracking object is dropped the pages * would be unpinned if they weren't unpinned before. + * + * Region should be CPU cached: abort if it isn't. */ + if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) { + ret = -EINVAL; + goto out; + } + ret = kbase_jd_user_buf_pin_pages(kctx, reg); if (ret) goto out; @@ -674,9 +681,8 @@ static int kbase_csf_queue_group_suspend_prepare( (kbase_reg_current_backed_size(reg) < nr_pages) || !(reg->flags & KBASE_REG_CPU_WR) || (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || - (reg->flags & KBASE_REG_DONT_NEED) || - (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) || - (reg->flags & KBASE_REG_NO_USER_FREE)) { + (kbase_is_region_shrinkable(reg)) || + (kbase_va_region_is_no_user_free(kctx, reg))) { ret = -EINVAL; goto out_clean_pages; } @@ -784,13 +790,14 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, return -EINVAL; } - sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val; + sig_set = + evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val; if (sig_set) { bool error = false; bitmap_set(cqs_wait->signaled, i, 1); if ((cqs_wait->inherit_err_flags & (1U << i)) && - evt[BASEP_EVENT_ERR_INDEX] > 0) { + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) { queue->has_error = true; error = true; } @@ -800,7 +807,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, error); KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( - kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]); + kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); queue->command_started = false; } @@ -817,12 +824,34 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs); } +static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type) +{ + return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64; +} + +static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type) +{ + BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES); + BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES); + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type)); + + switch (data_type) { + default: + return false; + case BASEP_CQS_DATA_TYPE_U32: + return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0; + case BASEP_CQS_DATA_TYPE_U64: + return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0; + } +} + static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, struct base_kcpu_command_cqs_wait_info *cqs_wait_info, struct kbase_kcpu_command *current_command) { struct base_cqs_wait_info *objs; unsigned int nr_objs = cqs_wait_info->nr_objs; + unsigned int i; lockdep_assert_held(&queue->lock); @@ -842,6 +871,17 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, return -ENOMEM; } + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { + kfree(objs); + return -EINVAL; + } + } + if (++queue->cqs_wait_count == 1) { if (kbase_csf_event_wait_add(queue->kctx, event_cqs_callback, queue)) { @@ -897,14 +937,13 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, "Sync memory %llx already freed", cqs_set->objs[i].addr); queue->has_error = true; } else { - evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error; /* Set to signaled */ - evt[BASEP_EVENT_VAL_INDEX]++; + evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++; kbase_phy_alloc_mapping_put(queue->kctx, mapping); - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, - queue, cqs_set->objs[i].addr, - evt[BASEP_EVENT_ERR_INDEX]); + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr, + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); } } @@ -921,6 +960,7 @@ static int kbase_kcpu_cqs_set_prepare( { struct base_cqs_set *objs; unsigned int nr_objs = cqs_set_info->nr_objs; + unsigned int i; lockdep_assert_held(&kcpu_queue->lock); @@ -940,6 +980,17 @@ static int kbase_kcpu_cqs_set_prepare( return -ENOMEM; } + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { + kfree(objs); + return -EINVAL; + } + } + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; current_command->info.cqs_set.nr_objs = nr_objs; current_command->info.cqs_set.objs = objs; @@ -982,8 +1033,9 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, if (!test_bit(i, cqs_wait_operation->signaled)) { struct kbase_vmap_struct *mapping; bool sig_set; - u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, - cqs_wait_operation->objs[i].addr, &mapping); + uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get( + queue->kctx, cqs_wait_operation->objs[i].addr, &mapping); + u64 val = 0; /* GPUCORE-28172 RDT to review */ if (!queue->command_started) @@ -996,12 +1048,29 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, return -EINVAL; } + switch (cqs_wait_operation->objs[i].data_type) { + default: + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid( + cqs_wait_operation->objs[i].data_type)); + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + queue->has_error = true; + return -EINVAL; + case BASEP_CQS_DATA_TYPE_U32: + val = *(u32 *)evt; + evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; + break; + case BASEP_CQS_DATA_TYPE_U64: + val = *(u64 *)evt; + evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; + break; + } + switch (cqs_wait_operation->objs[i].operation) { case BASEP_CQS_WAIT_OPERATION_LE: - sig_set = *evt <= cqs_wait_operation->objs[i].val; + sig_set = val <= cqs_wait_operation->objs[i].val; break; case BASEP_CQS_WAIT_OPERATION_GT: - sig_set = *evt > cqs_wait_operation->objs[i].val; + sig_set = val > cqs_wait_operation->objs[i].val; break; default: dev_dbg(kbdev->dev, @@ -1013,24 +1082,10 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, return -EINVAL; } - /* Increment evt up to the error_state value depending on the CQS data type */ - switch (cqs_wait_operation->objs[i].data_type) { - default: - dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type); - /* Fallthrough - hint to compiler that there's really only 2 options at present */ - fallthrough; - case BASEP_CQS_DATA_TYPE_U32: - evt = (u64 *)((u8 *)evt + sizeof(u32)); - break; - case BASEP_CQS_DATA_TYPE_U64: - evt = (u64 *)((u8 *)evt + sizeof(u64)); - break; - } - if (sig_set) { bitmap_set(cqs_wait_operation->signaled, i, 1); if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && - *evt > 0) { + *(u32 *)evt > 0) { queue->has_error = true; } @@ -1058,6 +1113,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue { struct base_cqs_wait_operation_info *objs; unsigned int nr_objs = cqs_wait_operation_info->nr_objs; + unsigned int i; lockdep_assert_held(&queue->lock); @@ -1077,6 +1133,18 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue return -ENOMEM; } + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || + !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { + kfree(objs); + return -EINVAL; + } + } + if (++queue->cqs_wait_count == 1) { if (kbase_csf_event_wait_add(queue->kctx, event_cqs_callback, queue)) { @@ -1107,6 +1175,44 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue return 0; } +static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue, + uintptr_t evt, u8 operation, u64 val) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + + switch (operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *(u32 *)evt += (u32)val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *(u32 *)evt = val; + break; + default: + dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); + queue->has_error = true; + break; + } +} + +static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue, + uintptr_t evt, u8 operation, u64 val) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + + switch (operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *(u64 *)evt += val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *(u64 *)evt = val; + break; + default: + dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); + queue->has_error = true; + break; + } +} + static void kbase_kcpu_cqs_set_operation_process( struct kbase_device *kbdev, struct kbase_kcpu_command_queue *queue, @@ -1121,9 +1227,9 @@ static void kbase_kcpu_cqs_set_operation_process( for (i = 0; i < cqs_set_operation->nr_objs; i++) { struct kbase_vmap_struct *mapping; - u64 *evt; + uintptr_t evt; - evt = (u64 *)kbase_phy_alloc_mapping_get( + evt = (uintptr_t)kbase_phy_alloc_mapping_get( queue->kctx, cqs_set_operation->objs[i].addr, &mapping); /* GPUCORE-28172 RDT to review */ @@ -1133,39 +1239,31 @@ static void kbase_kcpu_cqs_set_operation_process( "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); queue->has_error = true; } else { - switch (cqs_set_operation->objs[i].operation) { - case BASEP_CQS_SET_OPERATION_ADD: - *evt += cqs_set_operation->objs[i].val; - break; - case BASEP_CQS_SET_OPERATION_SET: - *evt = cqs_set_operation->objs[i].val; - break; - default: - dev_dbg(kbdev->dev, - "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation); - queue->has_error = true; - break; - } + struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i]; - /* Increment evt up to the error_state value depending on the CQS data type */ - switch (cqs_set_operation->objs[i].data_type) { + switch (obj->data_type) { default: - dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type); - /* Fallthrough - hint to compiler that there's really only 2 options at present */ - fallthrough; + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type)); + queue->has_error = true; + goto skip_err_propagation; case BASEP_CQS_DATA_TYPE_U32: - evt = (u64 *)((u8 *)evt + sizeof(u32)); + kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation, + obj->val); + evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; break; case BASEP_CQS_DATA_TYPE_U64: - evt = (u64 *)((u8 *)evt + sizeof(u64)); + kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation, + obj->val); + evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; break; } /* GPUCORE-28172 RDT to review */ /* Always propagate errors */ - *evt = queue->has_error; + *(u32 *)evt = queue->has_error; +skip_err_propagation: kbase_phy_alloc_mapping_put(queue->kctx, mapping); } } @@ -1183,6 +1281,7 @@ static int kbase_kcpu_cqs_set_operation_prepare( { struct base_cqs_set_operation_info *objs; unsigned int nr_objs = cqs_set_operation_info->nr_objs; + unsigned int i; lockdep_assert_held(&kcpu_queue->lock); @@ -1202,6 +1301,18 @@ static int kbase_kcpu_cqs_set_operation_prepare( return -ENOMEM; } + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || + !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { + kfree(objs); + return -EINVAL; + } + } + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; current_command->info.cqs_set_operation.nr_objs = nr_objs; current_command->info.cqs_set_operation.objs = objs; @@ -1234,9 +1345,8 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, queue_work(kcpu_queue->wq, &kcpu_queue->work); } -static void kbase_kcpu_fence_wait_cancel( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; @@ -1410,15 +1520,14 @@ static int kbase_kcpu_fence_wait_process( */ if (fence_status) - kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info); + kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info); return fence_status; } -static int kbase_kcpu_fence_wait_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_fence_info *fence_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_in; @@ -1429,8 +1538,7 @@ static int kbase_kcpu_fence_wait_prepare( lockdep_assert_held(&kcpu_queue->lock); - if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), - sizeof(fence))) + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) return -ENOMEM; fence_in = sync_file_get_fence(fence.basep.fd); @@ -1444,9 +1552,8 @@ static int kbase_kcpu_fence_wait_prepare( return 0; } -static int kbase_kcpu_fence_signal_process( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; int ret; @@ -1467,37 +1574,37 @@ static int kbase_kcpu_fence_signal_process( fence_info->fence->seqno); /* dma_fence refcount needs to be decreased to release it. */ - dma_fence_put(fence_info->fence); + kbase_fence_put(fence_info->fence); fence_info->fence = NULL; return ret; } -static int kbase_kcpu_fence_signal_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_fence_info *fence_info, - struct kbase_kcpu_command *current_command) +static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, + int *fd) { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_out; #else struct dma_fence *fence_out; #endif - struct base_fence fence; - struct sync_file *sync_file; + struct kbase_kcpu_dma_fence *kcpu_fence; int ret = 0; - int fd; lockdep_assert_held(&kcpu_queue->lock); - if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), - sizeof(fence))) - return -EFAULT; - - fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL); - if (!fence_out) + kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL); + if (!kcpu_fence) return -ENOMEM; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + fence_out = (struct fence *)kcpu_fence; +#else + fence_out = (struct dma_fence *)kcpu_fence; +#endif + dma_fence_init(fence_out, &kbase_fence_ops, &kbase_csf_fence_lock, @@ -1513,28 +1620,70 @@ static int kbase_kcpu_fence_signal_prepare( dma_fence_get(fence_out); #endif + /* Set reference to KCPU metadata and increment refcount */ + kcpu_fence->metadata = kcpu_queue->metadata; +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount)); +#else + WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); +#endif + /* create a sync_file fd representing the fence */ - sync_file = sync_file_create(fence_out); - if (!sync_file) { + *sync_file = sync_file_create(fence_out); + if (!(*sync_file)) { ret = -ENOMEM; goto file_create_fail; } - fd = get_unused_fd_flags(O_CLOEXEC); - if (fd < 0) { - ret = fd; + *fd = get_unused_fd_flags(O_CLOEXEC); + if (*fd < 0) { + ret = *fd; goto fd_flags_fail; } - fence.basep.fd = fd; + fence->basep.fd = *fd; current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; current_command->info.fence.fence = fence_out; + return 0; + +fd_flags_fail: + fput((*sync_file)->file); +file_create_fail: + /* + * Upon failure, dma_fence refcount that was increased by + * dma_fence_get() or sync_file_create() needs to be decreased + * to release it. + */ + kbase_fence_put(fence_out); + current_command->info.fence.fence = NULL; + + return ret; +} + +static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) +{ + struct base_fence fence; + struct sync_file *sync_file = NULL; + int fd; + int ret = 0; + + lockdep_assert_held(&kcpu_queue->lock); + + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) + return -EFAULT; + + ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd); + if (ret) + return ret; + if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, sizeof(fence))) { ret = -EFAULT; - goto fd_flags_fail; + goto fail; } /* 'sync_file' pointer can't be safely dereferenced once 'fd' is @@ -1544,21 +1693,34 @@ static int kbase_kcpu_fence_signal_prepare( fd_install(fd, sync_file->file); return 0; -fd_flags_fail: +fail: fput(sync_file->file); -file_create_fail: - /* - * Upon failure, dma_fence refcount that was increased by - * dma_fence_get() or sync_file_create() needs to be decreased - * to release it. - */ - dma_fence_put(fence_out); - + kbase_fence_put(current_command->info.fence.fence); current_command->info.fence.fence = NULL; - kfree(fence_out); return ret; } + +int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + if (!kcpu_queue || !fence_info) + return -EINVAL; + + return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info); +} +KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process); + +int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, int *fd) +{ + if (!kcpu_queue || !current_command || !fence || !sync_file || !fd) + return -EINVAL; + + return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd); +} +KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init); #endif /* CONFIG_SYNC_FILE */ static void kcpu_queue_process_worker(struct work_struct *data) @@ -1595,6 +1757,9 @@ static int delete_queue(struct kbase_context *kctx, u32 id) mutex_lock(&queue->lock); + /* Metadata struct may outlive KCPU queue. */ + kbase_kcpu_dma_fence_meta_put(queue->metadata); + /* Drain the remaining work for this queue first and go past * all the waits. */ @@ -1701,8 +1866,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) if (drain_queue) { - kbase_kcpu_fence_wait_cancel(queue, - &cmd->info.fence); + kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence); } else { status = kbase_kcpu_fence_wait_process(queue, &cmd->info.fence); @@ -1732,8 +1896,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) - status = kbase_kcpu_fence_signal_process( - queue, &cmd->info.fence); + status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence); if (status < 0) queue->has_error = true; @@ -2103,14 +2266,30 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, return -EINVAL; } + /* There might be a race between one thread trying to enqueue commands to the queue + * and other thread trying to delete the same queue. + * This racing could lead to use-after-free problem by enqueuing thread if + * resources for the queue has already been freed by deleting thread. + * + * To prevent the issue, two mutexes are acquired/release asymmetrically as follows. + * + * Lock A (kctx mutex) + * Lock B (queue mutex) + * Unlock A + * Unlock B + * + * With the kctx mutex being held, enqueuing thread will check the queue + * and will return error code if the queue had already been deleted. + */ mutex_lock(&kctx->csf.kcpu_queues.lock); queue = kctx->csf.kcpu_queues.array[enq->id]; - mutex_unlock(&kctx->csf.kcpu_queues.lock); - - if (queue == NULL) + if (queue == NULL) { + dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id); + mutex_unlock(&kctx->csf.kcpu_queues.lock); return -EINVAL; - + } mutex_lock(&queue->lock); + mutex_unlock(&kctx->csf.kcpu_queues.lock); if (kcpu_queue_get_space(queue) < enq->nr_commands) { ret = -EBUSY; @@ -2275,6 +2454,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) mutex_destroy(&kctx->csf.kcpu_queues.lock); } +KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_delete *del) @@ -2288,7 +2468,9 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_kcpu_command_queue *queue; int idx; int ret = 0; - +#if IS_ENABLED(CONFIG_SYNC_FILE) + struct kbase_kcpu_dma_fence_meta *metadata; +#endif /* The queue id is of u8 type and we use the index of the kcpu_queues * array as an id, so the number of elements in the array can't be * more than 256. @@ -2334,7 +2516,27 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, queue->fence_context = dma_fence_context_alloc(1); queue->fence_seqno = 0; queue->fence_wait_processed = false; + + metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); + if (!metadata) { + kfree(queue); + ret = -ENOMEM; + goto out; + } + + metadata->kbdev = kctx->kbdev; + metadata->kctx_id = kctx->id; + snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id, + kctx->tgid, kctx->id, queue->fence_context); + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + atomic_set(&metadata->refcount, 1); +#else + refcount_set(&metadata->refcount, 1); #endif + queue->metadata = metadata; + atomic_inc(&kctx->kbdev->live_fence_metadata); +#endif /* CONFIG_SYNC_FILE */ queue->enqueue_failed = false; queue->command_started = false; INIT_LIST_HEAD(&queue->jit_blocked); @@ -2360,3 +2562,4 @@ out: return ret; } +KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h index 5f9b8e0684bc..85db53867c06 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h @@ -22,6 +22,9 @@ #ifndef _KBASE_CSF_KCPU_H_ #define _KBASE_CSF_KCPU_H_ +#include +#include + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) #include #else @@ -44,8 +47,8 @@ struct kbase_kcpu_command_import_info { }; /** - * struct kbase_kcpu_command_fence_info - Structure which holds information - * about the fence object enqueued in the kcpu command queue + * struct kbase_kcpu_command_fence_info - Structure which holds information about the + * fence object enqueued in the kcpu command queue * * @fence_cb: Fence callback * @fence: Fence @@ -274,6 +277,8 @@ struct kbase_kcpu_command { * @jit_blocked: Used to keep track of command queues blocked * by a pending JIT allocation command. * @fence_timeout: Timer used to detect the fence wait timeout. + * @metadata: Metadata structure containing basic information about + * this queue for any fence objects associated with this queue. */ struct kbase_kcpu_command_queue { struct mutex lock; @@ -295,6 +300,9 @@ struct kbase_kcpu_command_queue { #ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG struct timer_list fence_timeout; #endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ +#if IS_ENABLED(CONFIG_SYNC_FILE) + struct kbase_kcpu_dma_fence_meta *metadata; +#endif /* CONFIG_SYNC_FILE */ }; /** @@ -359,4 +367,14 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); */ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx); +#if IS_ENABLED(CONFIG_SYNC_FILE) +/* Test wrappers for dma fence operations. */ +int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info); + +int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, int *fd); +#endif /* CONFIG_SYNC_FILE */ + #endif /* _KBASE_CSF_KCPU_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c new file mode 100644 index 000000000000..77e19dba4262 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c @@ -0,0 +1,815 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include "mali_kbase_csf.h" +#include "mali_kbase_csf_mcu_shared_reg.h" +#include + +/* Scaling factor in pre-allocating shared regions for suspend bufs and userios */ +#define MCU_SHARED_REGS_PREALLOCATE_SCALE (8) + +/* MCU shared region map attempt limit */ +#define MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT (4) + +/* Convert a VPFN to its start addr */ +#define GET_VPFN_VA(vpfn) ((vpfn) << PAGE_SHIFT) + +/* Macros for extract the corresponding VPFNs from a CSG_REG */ +#define CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn) +#define CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn + nr_susp_pages) +#define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi)) + +/* MCU shared segment dummy page mapping flags */ +#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX) + +/* MCU shared segment suspend buffer mapping flags */ +#define SUSP_PAGE_MAP_FLAGS \ + (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \ + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT)) + +/** + * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime + * resources for suspend buffer pages, userio pages + * and their corresponding mapping GPU VA addresses + * from the MCU shared interface segment + * + * @link: Link to the managing list for the wrapper object. + * @reg: pointer to the region allocated from the shared interface segment, which + * covers the normal/P-mode suspend buffers, userio pages of the queues + * @grp: Pointer to the bound kbase_queue_group, or NULL if no binding (free). + * @pmode_mapped: Boolean for indicating the region has MMU mapped with the bound group's + * protected mode suspend buffer pages. + */ +struct kbase_csg_shared_region { + struct list_head link; + struct kbase_va_region *reg; + struct kbase_queue_group *grp; + bool pmode_mapped; +}; + +static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev) +{ + unsigned long userio_map_flags; + + if (kbdev->system_coherency == COHERENCY_NONE) + userio_map_flags = + KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + else + userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH | + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); + + return (userio_map_flags | KBASE_REG_GPU_NX); +} + +static void set_page_meta_status_not_movable(struct tagged_addr phy) +{ + if (kbase_page_migration_enabled) { + struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy)); + + if (page_md) { + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } + } +} + +static struct kbase_csg_shared_region *get_group_bound_csg_reg(struct kbase_queue_group *group) +{ + return (struct kbase_csg_shared_region *)group->csg_reg; +} + +static inline int update_mapping_with_dummy_pages(struct kbase_device *kbdev, u64 vpfn, + u32 nr_pages) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS; + + return kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, shared_regs->dummy_phys, nr_pages, + mem_flags, KBASE_MEM_GROUP_CSF_FW); +} + +static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 nr_pages) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS; + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, + mmu_sync_info, NULL, false); +} + +/* Reset consecutive retry count to zero */ +static void notify_group_csg_reg_map_done(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + /* Just clear the internal map retry count */ + group->csg_reg_bind_retries = 0; +} + +/* Return true if a fatal group error has already been triggered */ +static bool notify_group_csg_reg_map_error(struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (group->csg_reg_bind_retries < U8_MAX) + group->csg_reg_bind_retries++; + + /* Allow only one fatal error notification */ + if (group->csg_reg_bind_retries == MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT) { + struct base_gpu_queue_group_error const err_payload = { + .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { .fatal_group = { .status = GPU_EXCEPTION_TYPE_SW_FAULT_0 } } + }; + + dev_err(kbdev->dev, "Fatal: group_%d_%d_%d exceeded shared region map retry limit", + group->kctx->tgid, group->kctx->id, group->handle); + kbase_csf_add_group_fatal_error(group, &err_payload); + kbase_event_wakeup(group->kctx); + } + + return group->csg_reg_bind_retries >= MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT; +} + +/* Replace the given phys at vpfn (reflecting a queue's userio_pages) mapping. + * If phys is NULL, the internal dummy_phys is used, which effectively + * restores back to the initialized state for the given queue's userio_pages + * (i.e. mapped to the default dummy page). + * In case of CSF mmu update error on a queue, the dummy phy is used to restore + * back the default 'unbound' (i.e. mapped to dummy) condition. + * + * It's the caller's responsibility to ensure that the given vpfn is extracted + * correctly from a CSG_REG object, for example, using CSG_REG_USERIO_VPFN(). + */ +static int userio_pages_replace_phys(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + int err = 0, err1; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (phys) { + unsigned long mem_flags_input = shared_regs->userio_mem_rd_flags; + unsigned long mem_flags_output = mem_flags_input | KBASE_REG_GPU_WR; + + /* Dealing with a queue's INPUT page */ + err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, &phys[0], 1, mem_flags_input, + KBASE_MEM_GROUP_CSF_IO); + /* Dealing with a queue's OUTPUT page */ + err1 = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn + 1, &phys[1], 1, + mem_flags_output, KBASE_MEM_GROUP_CSF_IO); + if (unlikely(err1)) + err = err1; + } + + if (unlikely(err) || !phys) { + /* Restore back to dummy_userio_phy */ + update_mapping_with_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES); + } + + return err; +} + +/* Update a group's queues' mappings for a group with its runtime bound group region */ +static int csg_reg_update_on_csis(struct kbase_device *kbdev, struct kbase_queue_group *group, + struct kbase_queue_group *prev_grp) +{ + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + struct tagged_addr *phy; + int err = 0, err1; + u32 i; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!csg_reg, "Update_userio pages: group has no bound csg_reg")) + return -EINVAL; + + for (i = 0; i < nr_csis; i++) { + struct kbase_queue *queue = group->bound_queues[i]; + struct kbase_queue *prev_queue = prev_grp ? prev_grp->bound_queues[i] : NULL; + + /* Set the phy if the group's queue[i] needs mapping, otherwise NULL */ + phy = (queue && queue->enabled && !queue->user_io_gpu_va) ? queue->phys : NULL; + + /* Either phy is valid, or this update is for a transition change from + * prev_group, and the prev_queue was mapped, so an update is required. + */ + if (phy || (prev_queue && prev_queue->user_io_gpu_va)) { + u64 vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, i, nr_susp_pages); + + err1 = userio_pages_replace_phys(kbdev, vpfn, phy); + + if (unlikely(err1)) { + dev_warn(kbdev->dev, + "%s: Error in update queue-%d mapping for csg_%d_%d_%d", + __func__, i, group->kctx->tgid, group->kctx->id, + group->handle); + err = err1; + } else if (phy) + queue->user_io_gpu_va = GET_VPFN_VA(vpfn); + + /* Mark prev_group's queue has lost its mapping */ + if (prev_queue) + prev_queue->user_io_gpu_va = 0; + } + } + + return err; +} + +/* Bind a group to a given csg_reg, any previous mappings with the csg_reg are replaced + * with the given group's phy pages, or, if no replacement, the default dummy pages. + * Note, the csg_reg's fields are in transition step-by-step from the prev_grp to its + * new binding owner in this function. At the end, the prev_grp would be completely + * detached away from the previously bound csg_reg. + */ +static int group_bind_csg_reg(struct kbase_device *kbdev, struct kbase_queue_group *group, + struct kbase_csg_shared_region *csg_reg) +{ + const unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + struct kbase_queue_group *prev_grp = csg_reg->grp; + struct kbase_va_region *reg = csg_reg->reg; + struct tagged_addr *phy; + int err = 0, err1; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* The csg_reg is expected still on the unused list so its link is not empty */ + if (WARN_ON_ONCE(list_empty(&csg_reg->link))) { + dev_dbg(kbdev->dev, "csg_reg is marked in active use"); + return -EINVAL; + } + + if (WARN_ON_ONCE(prev_grp && prev_grp->csg_reg != csg_reg)) { + dev_dbg(kbdev->dev, "Unexpected bound lost on prev_group"); + prev_grp->csg_reg = NULL; + return -EINVAL; + } + + /* Replacing the csg_reg bound group to the newly given one */ + csg_reg->grp = group; + group->csg_reg = csg_reg; + + /* Resolving mappings, deal with protected mode first */ + if (group->protected_suspend_buf.pma) { + /* We are binding a new group with P-mode ready, the prev_grp's P-mode mapping + * status is now stale during this transition of ownership. For the new owner, + * its mapping would have been updated away when it lost its binding previously. + * So it needs an update to this pma map. By clearing here the mapped flag + * ensures it reflects the new owner's condition. + */ + csg_reg->pmode_mapped = false; + err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); + } else if (csg_reg->pmode_mapped) { + /* Need to unmap the previous one, use the dummy pages */ + err = update_mapping_with_dummy_pages( + kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + + if (unlikely(err)) + dev_warn(kbdev->dev, "%s: Failed to update P-mode dummy for csg_%d_%d_%d", + __func__, group->kctx->tgid, group->kctx->id, group->handle); + + csg_reg->pmode_mapped = false; + } + + /* Unlike the normal suspend buf, the mapping of the protected mode suspend buffer is + * actually reflected by a specific mapped flag (due to phys[] is only allocated on + * in-need basis). So the GPU_VA is always updated to the bound region's corresponding + * VA, as a reflection of the binding to the csg_reg. + */ + group->protected_suspend_buf.gpu_va = + GET_VPFN_VA(CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages)); + + /* Deal with normal mode suspend buffer */ + phy = group->normal_suspend_buf.phy; + err1 = kbase_mmu_update_csf_mcu_pages(kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), phy, + nr_susp_pages, mem_flags, KBASE_MEM_GROUP_CSF_FW); + + if (unlikely(err1)) { + dev_warn(kbdev->dev, "%s: Failed to update suspend buffer for csg_%d_%d_%d", + __func__, group->kctx->tgid, group->kctx->id, group->handle); + + /* Attempt a restore to default dummy for removing previous mapping */ + if (prev_grp) + update_mapping_with_dummy_pages( + kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + err = err1; + /* Marking the normal suspend buffer is not mapped (due to error) */ + group->normal_suspend_buf.gpu_va = 0; + } else { + /* Marking the normal suspend buffer is actually mapped */ + group->normal_suspend_buf.gpu_va = + GET_VPFN_VA(CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages)); + } + + /* Deal with queue uerio_pages */ + err1 = csg_reg_update_on_csis(kbdev, group, prev_grp); + if (likely(!err1)) + err = err1; + + /* Reset the previous group's suspend buffers' GPU_VAs as it has lost its bound */ + if (prev_grp) { + prev_grp->normal_suspend_buf.gpu_va = 0; + prev_grp->protected_suspend_buf.gpu_va = 0; + prev_grp->csg_reg = NULL; + } + + return err; +} + +/* Notify the group is placed on-slot, hence the bound csg_reg is active in use */ +void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bounding", + group->kctx->tgid, group->kctx->id, group->handle)) + return; + + /* By dropping out the csg_reg from the unused list, it becomes active and is tracked + * by its bound group that is on-slot. The design is that, when this on-slot group is + * moved to off-slot, the scheduler slot-clean up will add it back to the tail of the + * unused list. + */ + if (!WARN_ON_ONCE(list_empty(&csg_reg->link))) + list_del_init(&csg_reg->link); +} + +/* Notify the group is placed off-slot, hence the bound csg_reg is not in active use + * anymore. Existing bounding/mappings are left untouched. These would only be dealt with + * if the bound csg_reg is to be reused with another group. + */ +void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bound", + group->kctx->tgid, group->kctx->id, group->handle)) + return; + + /* By adding back the csg_reg to the unused list, it becomes available for another + * group to break its existing binding and set up a new one. + */ + if (!list_empty(&csg_reg->link)) { + WARN_ONCE(group->csg_nr >= 0, "Group is assumed vacated from slot"); + list_move_tail(&csg_reg->link, &shared_regs->unused_csg_regs); + } else + list_add_tail(&csg_reg->link, &shared_regs->unused_csg_regs); +} + +/* Adding a new queue to an existing on-slot group */ +int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue) +{ + struct kbase_queue_group *group = queue->group; + struct kbase_csg_shared_region *csg_reg; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + u64 vpfn; + int err; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!group || group->csg_nr < 0, "No bound group, or group is not on-slot")) + return -EIO; + + csg_reg = get_group_bound_csg_reg(group); + if (WARN_ONCE(!csg_reg || !list_empty(&csg_reg->link), + "No bound csg_reg, or in wrong state")) + return -EIO; + + vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages); + err = userio_pages_replace_phys(kbdev, vpfn, queue->phys); + if (likely(!err)) { + /* Mark the queue has been successfully mapped */ + queue->user_io_gpu_va = GET_VPFN_VA(vpfn); + } else { + /* Mark the queue has no mapping on its phys[] */ + queue->user_io_gpu_va = 0; + dev_dbg(kbdev->dev, + "%s: Error in mapping userio pages for queue-%d of csg_%d_%d_%d", __func__, + queue->csi_index, group->kctx->tgid, group->kctx->id, group->handle); + + /* notify the error for the bound group */ + if (notify_group_csg_reg_map_error(group)) + err = -EIO; + } + + return err; +} + +/* Unmap a given queue's userio pages, when the queue is deleted */ +void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue) +{ + struct kbase_queue_group *group; + struct kbase_csg_shared_region *csg_reg; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + u64 vpfn; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* The queue has no existing mapping, nothing to do */ + if (!queue || !queue->user_io_gpu_va) + return; + + group = queue->group; + if (WARN_ONCE(!group || !group->csg_reg, "Queue/Group has no bound region")) + return; + + csg_reg = get_group_bound_csg_reg(group); + + vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages); + + WARN_ONCE(userio_pages_replace_phys(kbdev, vpfn, NULL), + "Unexpected restoring to dummy map update error"); + queue->user_io_gpu_va = 0; +} + +int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + int err = 0, err1; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!csg_reg, "Update_pmode_map: the bound csg_reg can't be NULL")) + return -EINVAL; + + /* If the pmode already mapped, nothing to do */ + if (csg_reg->pmode_mapped) + return 0; + + /* P-mode map not in place and the group has allocated P-mode pages, map it */ + if (group->protected_suspend_buf.pma) { + unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS; + struct tagged_addr *phy = shared_regs->pma_phys; + struct kbase_va_region *reg = csg_reg->reg; + u64 vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); + u32 i; + + /* Populate the protected phys from pma to phy[] */ + for (i = 0; i < nr_susp_pages; i++) + phy[i] = as_tagged(group->protected_suspend_buf.pma[i]->pa); + + /* Add the P-mode suspend buffer mapping */ + err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, phy, nr_susp_pages, mem_flags, + KBASE_MEM_GROUP_CSF_FW); + + /* If error, restore to default dummpy */ + if (unlikely(err)) { + err1 = update_mapping_with_dummy_pages(kbdev, vpfn, nr_susp_pages); + if (unlikely(err1)) + dev_warn( + kbdev->dev, + "%s: Failed in recovering to P-mode dummy for csg_%d_%d_%d", + __func__, group->kctx->tgid, group->kctx->id, + group->handle); + + csg_reg->pmode_mapped = false; + } else + csg_reg->pmode_mapped = true; + } + + return err; +} + +void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + struct kbase_va_region *reg; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + int err = 0; + u32 i; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* Nothing to do for clearing up if no bound csg_reg */ + if (!csg_reg) + return; + + reg = csg_reg->reg; + /* Restore mappings default dummy pages for any mapped pages */ + if (csg_reg->pmode_mapped) { + err = update_mapping_with_dummy_pages( + kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + WARN_ONCE(unlikely(err), "Restore dummy failed for clearing pmod buffer mapping"); + + csg_reg->pmode_mapped = false; + } + + if (group->normal_suspend_buf.gpu_va) { + err = update_mapping_with_dummy_pages( + kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + WARN_ONCE(err, "Restore dummy failed for clearing suspend buffer mapping"); + } + + /* Deal with queue uerio pages */ + for (i = 0; i < nr_csis; i++) + kbase_csf_mcu_shared_drop_stopped_queue(kbdev, group->bound_queues[i]); + + group->normal_suspend_buf.gpu_va = 0; + group->protected_suspend_buf.gpu_va = 0; + + /* Break the binding */ + group->csg_reg = NULL; + csg_reg->grp = NULL; + + /* Put the csg_reg to the front of the unused list */ + if (WARN_ON_ONCE(list_empty(&csg_reg->link))) + list_add(&csg_reg->link, &shared_regs->unused_csg_regs); + else + list_move(&csg_reg->link, &shared_regs->unused_csg_regs); +} + +int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + struct kbase_csg_shared_region *csg_reg; + int err; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + csg_reg = get_group_bound_csg_reg(group); + if (!csg_reg) + csg_reg = list_first_entry_or_null(&shared_regs->unused_csg_regs, + struct kbase_csg_shared_region, link); + + if (!WARN_ON_ONCE(!csg_reg)) { + struct kbase_queue_group *prev_grp = csg_reg->grp; + + /* Deal with the previous binding and lazy unmap, i.e if the previous mapping not + * the required one, unmap it. + */ + if (prev_grp == group) { + /* Update existing bindings, if there have been some changes */ + err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); + if (likely(!err)) + err = csg_reg_update_on_csis(kbdev, group, NULL); + } else + err = group_bind_csg_reg(kbdev, group, csg_reg); + } else { + /* This should not have been possible if the code operates rightly */ + dev_err(kbdev->dev, "%s: Unexpected NULL csg_reg for group %d of context %d_%d", + __func__, group->handle, group->kctx->tgid, group->kctx->id); + return -EIO; + } + + if (likely(!err)) + notify_group_csg_reg_map_done(group); + else + notify_group_csg_reg_map_error(group); + + return err; +} + +static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, + struct kbase_csg_shared_region *csg_reg) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis); + struct kbase_va_region *reg; + u64 vpfn; + int err, i; + + INIT_LIST_HEAD(&csg_reg->link); + reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages, + KBASE_REG_ZONE_MCU_SHARED); + + if (!reg) { + dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n", + __func__, nr_csg_reg_pages); + return -ENOMEM; + } + + /* Insert the region into rbtree, so it becomes ready to use */ + mutex_lock(&kbdev->csf.reg_lock); + err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_csg_reg_pages, 1); + reg->flags &= ~KBASE_REG_FREE; + mutex_unlock(&kbdev->csf.reg_lock); + if (err) { + kfree(reg); + dev_err(kbdev->dev, "%s: Failed to add a region of %zu pages into rbtree", __func__, + nr_csg_reg_pages); + return err; + } + + /* Initialize the mappings so MMU only need to update the the corresponding + * mapped phy-pages at runtime. + * Map the normal suspend buffer pages to the prepared dummy phys[]. + */ + vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); + err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages); + + if (unlikely(err)) + goto fail_susp_map_fail; + + /* Map the protected suspend buffer pages to the prepared dummy phys[] */ + vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); + err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages); + + if (unlikely(err)) + goto fail_pmod_map_fail; + + for (i = 0; i < nr_csis; i++) { + vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); + err = insert_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES); + + if (unlikely(err)) + goto fail_userio_pages_map_fail; + } + + /* Replace the previous NULL-valued field with the successully initialized reg */ + csg_reg->reg = reg; + + return 0; + +fail_userio_pages_map_fail: + while (i-- > 0) { + vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true); + } + + vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, MCU_AS_NR, true); +fail_pmod_map_fail: + vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, MCU_AS_NR, true); +fail_susp_map_fail: + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(kbdev, reg); + mutex_unlock(&kbdev->csf.reg_lock); + kfree(reg); + + return err; +} + +/* Note, this helper can only be called on scheduler shutdown */ +static void shared_mcu_csg_reg_term(struct kbase_device *kbdev, + struct kbase_csg_shared_region *csg_reg) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + struct kbase_va_region *reg = csg_reg->reg; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + u64 vpfn; + int i; + + for (i = 0; i < nr_csis; i++) { + vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true); + } + + vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, MCU_AS_NR, true); + vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, MCU_AS_NR, true); + + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(kbdev, reg); + mutex_unlock(&kbdev->csf.reg_lock); + kfree(reg); +} + +int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data; + struct kbase_csg_shared_region *array_csg_regs; + const size_t nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + const u32 nr_groups = kbdev->csf.global_iface.group_num; + const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups; + const u32 nr_dummy_phys = MAX(nr_susp_pages, KBASEP_NUM_CS_USER_IO_PAGES); + u32 i; + int err; + + shared_regs->userio_mem_rd_flags = get_userio_mmu_flags(kbdev); + INIT_LIST_HEAD(&shared_regs->unused_csg_regs); + + shared_regs->dummy_phys = + kcalloc(nr_dummy_phys, sizeof(*shared_regs->dummy_phys), GFP_KERNEL); + if (!shared_regs->dummy_phys) + return -ENOMEM; + + if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, + &shared_regs->dummy_phys[0], false) <= 0) + return -ENOMEM; + + shared_regs->dummy_phys_allocated = true; + set_page_meta_status_not_movable(shared_regs->dummy_phys[0]); + + /* Replicate the allocated single shared_regs->dummy_phys[0] to the full array */ + for (i = 1; i < nr_dummy_phys; i++) + shared_regs->dummy_phys[i] = shared_regs->dummy_phys[0]; + + shared_regs->pma_phys = kcalloc(nr_susp_pages, sizeof(*shared_regs->pma_phys), GFP_KERNEL); + if (!shared_regs->pma_phys) + return -ENOMEM; + + array_csg_regs = kcalloc(nr_csg_regs, sizeof(*array_csg_regs), GFP_KERNEL); + if (!array_csg_regs) + return -ENOMEM; + shared_regs->array_csg_regs = array_csg_regs; + + /* All fields in scheduler->mcu_regs_data except the shared_regs->array_csg_regs + * are properly populated and ready to use. Now initialize the items in + * shared_regs->array_csg_regs[] + */ + for (i = 0; i < nr_csg_regs; i++) { + err = shared_mcu_csg_reg_init(kbdev, &array_csg_regs[i]); + if (err) + return err; + + list_add_tail(&array_csg_regs[i].link, &shared_regs->unused_csg_regs); + } + + return 0; +} + +void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data; + struct kbase_csg_shared_region *array_csg_regs = + (struct kbase_csg_shared_region *)shared_regs->array_csg_regs; + const u32 nr_groups = kbdev->csf.global_iface.group_num; + const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups; + + if (array_csg_regs) { + struct kbase_csg_shared_region *csg_reg; + u32 i, cnt_csg_regs = 0; + + for (i = 0; i < nr_csg_regs; i++) { + csg_reg = &array_csg_regs[i]; + /* There should not be any group mapping bindings */ + WARN_ONCE(csg_reg->grp, "csg_reg has a bound group"); + + if (csg_reg->reg) { + shared_mcu_csg_reg_term(kbdev, csg_reg); + cnt_csg_regs++; + } + } + + /* The nr_susp_regs counts should match the array_csg_regs' length */ + list_for_each_entry(csg_reg, &shared_regs->unused_csg_regs, link) + cnt_csg_regs--; + + WARN_ONCE(cnt_csg_regs, "Unmatched counts of susp_regs"); + kfree(shared_regs->array_csg_regs); + } + + if (shared_regs->dummy_phys_allocated) { + struct page *page = as_page(shared_regs->dummy_phys[0]); + + kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); + } + + kfree(shared_regs->dummy_phys); + kfree(shared_regs->pma_phys); +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h new file mode 100644 index 000000000000..61943cbbf083 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_MCU_SHARED_REG_H_ +#define _KBASE_CSF_MCU_SHARED_REG_H_ + +/** + * kbase_csf_mcu_shared_set_group_csg_reg_active - Notify that the group is active on-slot with + * scheduling action. Essential runtime resources + * are bound with the group for it to run + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the group that is placed into active on-slot running by the scheduler. + * + */ +void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_set_group_csg_reg_unused - Notify that the group is placed off-slot with + * scheduling action. Some of bound runtime + * resources can be reallocated for others to use + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the group that is placed off-slot by the scheduler. + * + */ +void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_group_update_pmode_map - Request to update the given group's protected + * suspend buffer pages to be mapped for supporting + * protected mode operations. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the group for attempting a protected mode suspend buffer binding/mapping. + * + * Return: 0 for success, the group has a protected suspend buffer region mapped. Otherwise an + * error code is returned. + */ +int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_clear_evicted_group_csg_reg - Clear any bound regions/mappings as the + * given group is evicted out of the runtime + * operations. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the group that has been evicted out of set of operational groups. + * + * This function will taken away any of the bindings/mappings immediately so the resources + * are not tied up to the given group, which has been evicted out of scheduling action for + * termination. + */ +void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_add_queue - Request to add a newly activated queue for a group to be + * run on slot. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @queue: Pointer to the queue that requires some runtime resource to be bound for joining + * others that are already running on-slot with their bound group. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue); + +/** + * kbase_csf_mcu_shared_drop_stopped_queue - Request to drop a queue after it has been stopped + * from its operational state from a group. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @queue: Pointer to the queue that has been stopped from operational state. + * + */ +void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue); + +/** + * kbase_csf_mcu_shared_group_bind_csg_reg - Bind some required runtime resources to the given + * group for ready to run on-slot. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the queue group that requires the runtime resources. + * + * This function binds/maps the required suspend buffer pages and userio pages for the given + * group, readying it to run on-slot. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_regs_data_init - Allocate and initialize the MCU shared regions data for + * the given device. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function allocate and initialize the MCU shared VA regions for runtime operations + * of the CSF scheduler. + * + * Return: 0 on success, or an error code. + */ +int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev); + +/** + * kbase_csf_mcu_shared_regs_data_term - Terminate the allocated MCU shared regions data for + * the given device. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function terminates the MCU shared VA regions allocated for runtime operations + * of the CSF scheduler. + */ +void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev); + +#endif /* _KBASE_CSF_MCU_SHARED_REG_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h index 6dde56cb161a..82389e5bf2a3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h @@ -229,22 +229,44 @@ #define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ #define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ -#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */ -#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */ -#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */ -#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */ +#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */ +#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */ +#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */ +#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */ + +/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */ +#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */ +#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */ +#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */ +#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */ + #define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ #define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ /* GLB_OUTPUT_BLOCK register offsets */ +#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */ +#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */ +#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */ +#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */ + #define GLB_ACK 0x0000 /* () Global acknowledge */ #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ #define GLB_HALT_STATUS 0x0010 /* () Global halt status */ #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ -#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */ +#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */ #define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ +#ifdef CONFIG_MALI_CORESIGHT +#define GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT 4 +#define GLB_DEBUG_REQ_FW_AS_WRITE_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT) +#define GLB_DEBUG_REQ_FW_AS_READ_SHIFT 5 +#define GLB_DEBUG_REQ_FW_AS_READ_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_READ_SHIFT) +#define GLB_DEBUG_ARG_IN0 0x0FE0 +#define GLB_DEBUG_ARG_IN1 0x0FE4 +#define GLB_DEBUG_ARG_OUT0 0x0FE0 +#endif /* CONFIG_MALI_CORESIGHT */ + /* USER register offsets */ #define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */ @@ -304,10 +326,17 @@ #define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 #define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) #define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ - (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) + (((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) #define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) +#define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12 +#define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) +#define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \ + (((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) +#define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) | \ + (((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK)) #define CS_REQ_TILER_OOM_SHIFT 26 #define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) #define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) @@ -582,6 +611,13 @@ #define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT 30 +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK)) #define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 #define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) #define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ @@ -1590,4 +1626,43 @@ ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK)) +/* GLB_DEBUG_REQ register */ +#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23) +#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \ + (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK)) + +#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24) +#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) +#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT) +#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ + (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) + +/* GLB_DEBUG_ACK register */ +#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) +#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \ + (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK)) + +#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24) +#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) +#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT) +#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \ + (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK)) + +/* RUN_MODE values */ +#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0 +#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1 +/* End of RUN_MODE values */ + #endif /* _KBASE_CSF_REGISTERS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c index b3cdef7dae52..135d3b01a2ff 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c @@ -32,6 +32,7 @@ #include #include #include "mali_kbase_csf_tiler_heap_reclaim.h" +#include "mali_kbase_csf_mcu_shared_reg.h" /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -57,6 +58,9 @@ /* Time to wait for completion of PING req before considering MCU as hung */ #define FW_PING_AFTER_ERROR_TIMEOUT_MS (10) +/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */ +#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT + static int scheduler_group_schedule(struct kbase_queue_group *group); static void remove_group_from_idle_wait(struct kbase_queue_group *const group); static @@ -561,7 +565,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) * updated whilst gpu_idle_worker() is executing. */ scheduler->fast_gpu_idle_handling = - (kbdev->csf.gpu_idle_hysteresis_ms == 0) || + (kbdev->csf.gpu_idle_hysteresis_us == 0) || !kbase_csf_scheduler_all_csgs_idle(kbdev); /* The GPU idle worker relies on update_on_slot_queues_offsets() to have @@ -1458,6 +1462,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) err = sched_halt_stream(queue); unassign_user_doorbell_from_queue(kbdev, queue); + kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue); } mutex_unlock(&kbdev->csf.scheduler.lock); @@ -1575,17 +1580,15 @@ static void program_cs(struct kbase_device *kbdev, kbase_csf_firmware_cs_input(stream, CS_SIZE, queue->size); - user_input = (queue->reg->start_pfn << PAGE_SHIFT); - kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, - user_input & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, - user_input >> 32); + user_input = queue->user_io_gpu_va; + WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va"); - user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT); - kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, - user_output & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, - user_output >> 32); + kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32); + + user_output = user_input + PAGE_SIZE; + kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32); kbase_csf_firmware_cs_input(stream, CS_CONFIG, (queue->doorbell_nr << 8) | (queue->priority & 0xF)); @@ -1616,8 +1619,10 @@ static void program_cs(struct kbase_device *kbdev, * or protected mode switch. */ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, - CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK, - CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK); + CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | + CS_REQ_IDLE_SHARED_SB_DEC_MASK, + CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | + CS_REQ_IDLE_SHARED_SB_DEC_MASK); /* Set state to START/STOP */ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, @@ -1632,6 +1637,20 @@ static void program_cs(struct kbase_device *kbdev, update_hw_active(queue, true); } +static int onslot_csg_add_new_queue(struct kbase_queue *queue) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + int err; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + err = kbase_csf_mcu_shared_add_queue(kbdev, queue); + if (!err) + program_cs(kbdev, queue, true); + + return err; +} + int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) { struct kbase_queue_group *group = queue->group; @@ -1687,8 +1706,28 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) * user door-bell on such a case. */ kbase_csf_ring_cs_user_doorbell(kbdev, queue); - } else - program_cs(kbdev, queue, true); + } else { + err = onslot_csg_add_new_queue(queue); + /* For an on slot CSG, the only error in adding a new + * queue to run is that the scheduler could not map + * the required userio pages due to likely some resource + * issues. In such a case, and if the group is yet + * to enter its fatal error state, we return a -EBUSY + * to the submitter for another kick. The queue itself + * has yet to be programmed hence needs to remain its + * previous (disabled) state. If the error persists, + * the group will eventually reports a fatal error by + * the group's error reporting mechanism, when the MCU + * shared region map retry limit of the group is + * exceeded. For such a case, the expected error value + * is -EIO. + */ + if (unlikely(err)) { + queue->enabled = cs_enabled; + mutex_unlock(&kbdev->csf.scheduler.lock); + return (err != -EIO) ? -EBUSY : err; + } + } } queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work, msecs_to_jiffies(kbase_get_timeout_ms( @@ -1899,9 +1938,12 @@ static bool evaluate_sync_update(struct kbase_queue *queue) struct kbase_vmap_struct *mapping; bool updated = false; u32 *sync_ptr; + u32 sync_wait_size; + u32 sync_wait_align_mask; u32 sync_wait_cond; u32 sync_current_val; struct kbase_device *kbdev; + bool sync_wait_align_valid = false; bool sync_wait_cond_valid = false; if (WARN_ON(!queue)) @@ -1911,6 +1953,16 @@ static bool evaluate_sync_update(struct kbase_queue *queue) lockdep_assert_held(&kbdev->csf.scheduler.lock); + sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait); + sync_wait_align_mask = + (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1; + sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0; + if (!sync_wait_align_valid) { + dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned", + queue->sync_ptr); + goto out; + } + sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, &mapping); @@ -1995,7 +2047,7 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, status); - if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { + if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) { queue->status_wait = status; queue->sync_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_LO); @@ -2011,7 +2063,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, kbase_csf_firmware_cs_output(stream, CS_STATUS_BLOCKED_REASON)); - if (!evaluate_sync_update(queue)) { + if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) || + !evaluate_sync_update(queue)) { is_waiting = true; } else { /* Sync object already got updated & met the condition @@ -2305,7 +2358,7 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, insert_group_to_idle_wait(group); } -static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) +static void update_offslot_non_idle_cnt(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; @@ -2442,9 +2495,14 @@ static void save_csg_slot(struct kbase_queue_group *group) if (!queue || !queue->enabled) continue; - if (save_slot_cs(ginfo, queue)) - sync_wait = true; - else { + if (save_slot_cs(ginfo, queue)) { + /* sync_wait is only true if the queue is blocked on + * a CQS and not a scoreboard. + */ + if (queue->blocked_reason != + CS_STATUS_BLOCKED_ON_SB_WAIT) + sync_wait = true; + } else { /* Need to confirm if ringbuffer of the GPU * queue is empty or not. A race can arise * between the flush of GPU queue and suspend @@ -2558,6 +2616,11 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot); + /* Notify the group is off-slot and the csg_reg might be available for + * resue with other groups in a 'lazy unbinding' style. + */ + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); + return as_fault; } @@ -2641,8 +2704,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, u32 state; int i; unsigned long flags; - const u64 normal_suspend_buf = - group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT; + u64 normal_suspend_buf; + u64 protm_suspend_buf; struct kbase_csf_csg_slot *csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; @@ -2654,6 +2717,19 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); + if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) { + dev_warn(kbdev->dev, + "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u", + group->handle, group->kctx->tgid, kctx->id, slot); + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); + return; + } + + /* The suspend buf has already been mapped through binding to csg_reg */ + normal_suspend_buf = group->normal_suspend_buf.gpu_va; + protm_suspend_buf = group->protected_suspend_buf.gpu_va; + WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped"); + ginfo = &global_iface->groups[slot]; /* Pick an available address space for this context */ @@ -2666,6 +2742,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, if (kctx->as_nr == KBASEP_AS_NR_INVALID) { dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", group->handle, kctx->tgid, kctx->id, slot); + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); return; } @@ -2716,15 +2793,15 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, normal_suspend_buf >> 32); - if (group->protected_suspend_buf.reg) { - const u64 protm_suspend_buf = - group->protected_suspend_buf.reg->start_pfn << - PAGE_SHIFT; - kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, - protm_suspend_buf & U32_MAX); - kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, - protm_suspend_buf >> 32); - } + /* Note, we program the P-mode buffer pointer here, but actual runtime + * enter into pmode execution is controlled by the P-mode phy pages are + * allocated and mapped with the bound csg_reg, which has a specific flag + * for indicating this P-mode runnable condition before a group is + * granted its p-mode section entry. Without a P-mode entry, the buffer + * pointed is not going to be accessed at all. + */ + kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32); if (group->dvs_buf) { kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO, @@ -2777,6 +2854,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, /* Programming a slot consumes a group from scanout */ update_offslot_non_idle_cnt_for_onslot_grp(group); + + /* Notify the group's bound csg_reg is now in active use */ + kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group); } static void remove_scheduled_group(struct kbase_device *kbdev, @@ -2797,7 +2877,7 @@ static void remove_scheduled_group(struct kbase_device *kbdev, } static void sched_evict_group(struct kbase_queue_group *group, bool fault, - bool update_non_idle_offslot_grps_cnt) + bool update_non_idle_offslot_grps_cnt_from_run_state) { struct kbase_context *kctx = group->kctx; struct kbase_device *kbdev = kctx->kbdev; @@ -2808,7 +2888,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, if (queue_group_scheduled_locked(group)) { u32 i; - if (update_non_idle_offslot_grps_cnt && + if (update_non_idle_offslot_grps_cnt_from_run_state && (group->run_state == KBASE_CSF_GROUP_SUSPENDED || group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { int new_val = atomic_dec_return( @@ -2823,8 +2903,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, } if (group->prepared_seq_num != - KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) + KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) { + if (!update_non_idle_offslot_grps_cnt_from_run_state) + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); + } if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) remove_group_from_idle_wait(group); @@ -2851,6 +2934,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, } kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group); + + /* Clear all the bound shared regions and unmap any in-place MMU maps */ + kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group); } static int term_group_sync(struct kbase_queue_group *group) @@ -3230,8 +3316,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, scheduler->remaining_tick_slots--; } } else { - update_offslot_non_idle_cnt_for_faulty_grp( - group); + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); } } @@ -3421,8 +3506,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) */ clear_bit(i, slot_mask); set_bit(i, scheduler->csgs_events_enable_mask); - update_offslot_non_idle_cnt_for_onslot_grp( - group); } suspend_wait_failed = true; @@ -3882,11 +3965,16 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, struct kbase_queue_group *const input_grp) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf; unsigned long flags; bool protm_in_use; lockdep_assert_held(&scheduler->lock); + /* Return early if the physical pages have not been allocated yet */ + if (unlikely(!sbuf->pma)) + return; + /* This lock is taken to prevent the issuing of MMU command during the * transition to protected mode. This helps avoid the scenario where the * entry to protected mode happens with a memory region being locked and @@ -3945,6 +4033,15 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp, 0u); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + /* Coresight must be disabled before entering protected mode. */ + kbase_debug_coresight_csf_disable_pmode_enter(kbdev); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + kbase_csf_enter_protected_mode(kbdev); /* Set the pending protm seq number to the next one */ protm_enter_set_next_pending_seq(kbdev); @@ -4057,8 +4154,7 @@ static void scheduler_apply(struct kbase_device *kbdev) if (!kctx_as_enabled(group->kctx) || group->faulted) { /* Drop the head group and continue */ - update_offslot_non_idle_cnt_for_faulty_grp( - group); + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); continue; } @@ -4337,6 +4433,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, set_bit(i, csg_bitmap); } else { group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); } } @@ -5170,16 +5268,12 @@ redo_local_tock: * queue jobs. */ if (protm_grp && scheduler->top_grp == protm_grp) { - int new_val; - dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", protm_grp->handle); - new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp, - new_val); - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + update_offslot_non_idle_cnt_for_onslot_grp(protm_grp); + remove_scheduled_group(kbdev, protm_grp); scheduler_check_pmode_progress(kbdev); } else if (scheduler->top_grp) { if (protm_grp) @@ -5993,8 +6087,11 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) mutex_lock(&scheduler->lock); - if (group->run_state == KBASE_CSF_GROUP_IDLE) + if (group->run_state == KBASE_CSF_GROUP_IDLE) { group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); + } /* Check if the group is now eligible for execution in protected mode. */ if (scheduler_get_protm_enter_async_group(kbdev, group)) scheduler_group_check_protm_enter(kbdev, group); @@ -6262,6 +6359,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) int priority; int err; + kbase_ctx_sched_init_ctx(kctx); + for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++priority) { INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); @@ -6278,7 +6377,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (!kctx->csf.sched.sync_update_wq) { dev_err(kctx->kbdev->dev, "Failed to initialize scheduler context workqueue"); - return -ENOMEM; + err = -ENOMEM; + goto alloc_wq_failed; } INIT_WORK(&kctx->csf.sched.sync_update_work, @@ -6291,10 +6391,16 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (err) { dev_err(kctx->kbdev->dev, "Failed to register a sync update callback"); - destroy_workqueue(kctx->csf.sched.sync_update_wq); + goto event_wait_add_failed; } return err; + +event_wait_add_failed: + destroy_workqueue(kctx->csf.sched.sync_update_wq); +alloc_wq_failed: + kbase_ctx_sched_remove_ctx(kctx); + return err; } void kbase_csf_scheduler_context_term(struct kbase_context *kctx) @@ -6302,6 +6408,8 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); cancel_work_sync(&kctx->csf.sched.sync_update_work); destroy_workqueue(kctx->csf.sched.sync_update_wq); + + kbase_ctx_sched_remove_ctx(kctx); } int kbase_csf_scheduler_init(struct kbase_device *kbdev) @@ -6320,7 +6428,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) return -ENOMEM; } - return 0; + return kbase_csf_mcu_shared_regs_data_init(kbdev); } int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) @@ -6420,6 +6528,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) } KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL, kbase_csf_scheduler_get_nr_active_csgs(kbdev)); + /* Terminating the MCU shared regions, following the release of slots */ + kbase_csf_mcu_shared_regs_data_term(kbdev); } void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c new file mode 100644 index 000000000000..a5e0ab5eaf17 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c @@ -0,0 +1,788 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_sync_debugfs.h" +#include "mali_kbase_csf_csg_debugfs.h" +#include +#include + +#if IS_ENABLED(CONFIG_SYNC_FILE) +#include "mali_kbase_sync.h" +#endif + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)" + +/* GPU queue related values */ +#define GPU_CSF_MOVE_OPCODE ((u64)0x1) +#define GPU_CSF_MOVE32_OPCODE ((u64)0x2) +#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25) +#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26) +#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27) +#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33) +#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34) +#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35) +#define GPU_CSF_CALL_OPCODE ((u64)0x20) + +#define MAX_NR_GPU_CALLS (5) +#define INSTR_OPCODE_MASK ((u64)0xFF << 56) +#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56) +#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL) +#define MOVE_DEST_MASK ((u64)0xFF << 48) +#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48) +#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL) +#define SYNC_SRC0_MASK ((u64)0xFF << 40) +#define SYNC_SRC1_MASK ((u64)0xFF << 32) +#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40) +#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32) +#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28) +#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28) + +/* Enumeration for types of GPU queue sync events for + * the purpose of dumping them through debugfs. + */ +enum debugfs_gpu_sync_type { + DEBUGFS_GPU_SYNC_WAIT, + DEBUGFS_GPU_SYNC_SET, + DEBUGFS_GPU_SYNC_ADD, + NUM_DEBUGFS_GPU_SYNC_TYPES +}; + +/** + * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object. + * + * @kctx: The context of the queue. + * @obj_addr: Pointer to the CQS live 32-bit value. + * @live_val: Pointer to the u32 that will be set to the CQS object's current, live + * value. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr, + u32 *live_val) +{ + struct kbase_vmap_struct *mapping; + u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); + + if (!cpu_ptr) + return -1; + + *live_val = *cpu_ptr; + kbase_phy_alloc_mapping_put(kctx, mapping); + return 0; +} + +/** + * kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object. + * + * @kctx: The context of the queue. + * @obj_addr: Pointer to the CQS live value (32 or 64-bit). + * @live_val: Pointer to the u64 that will be set to the CQS object's current, live + * value. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr, + u64 *live_val) +{ + struct kbase_vmap_struct *mapping; + u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); + + if (!cpu_ptr) + return -1; + + *live_val = *cpu_ptr; + kbase_phy_alloc_mapping_put(kctx, mapping); + return 0; +} + +/** + * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait + * or Fence Signal command, contained in a + * KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT. + */ +static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file, + struct kbase_kcpu_command *cmd, + const char *cmd_name) +{ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence = NULL; +#else + struct dma_fence *fence = NULL; +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + + struct kbase_sync_fence_info info; + const char *timeline_name = NULL; + bool is_signaled = false; + + fence = cmd->info.fence.fence; + if (WARN_ON(!fence)) + return; + + kbase_sync_fence_info_get(cmd->info.fence.fence, &info); + timeline_name = fence->ops->get_timeline_name(fence); + is_signaled = info.status > 0; + + seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence, + is_signaled); + + /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ + seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", + timeline_name, fence->context, (u64)fence->seqno); +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command, + * contained in a KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file, + struct kbase_kcpu_command *cmd) +{ + struct kbase_context *kctx = file->private; + size_t i; + + for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { + struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i]; + + u32 live_val; + int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); + bool live_val_valid = (ret >= 0); + + seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + + if (live_val_valid) + seq_printf(file, "0x%.16llx", (u64)live_val); + else + seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + + seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val); + } +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS + * Set command, contained in a KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file, + struct kbase_kcpu_command *cmd) +{ + struct kbase_context *kctx = file->private; + size_t i; + + for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { + struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i]; + + u32 live_val; + int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); + bool live_val_valid = (ret >= 0); + + seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + + if (live_val_valid) + seq_printf(file, "0x%.16llx", (u64)live_val); + else + seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + + seq_printf(file, " | op:add arg_value:0x%.8x", 1); + } +} + +/** + * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation. + * + * @op: The numerical value of operation. + * + * Return: const static pointer to the command name, or '??' if unknown. + */ +static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op) +{ + const char *string; + + switch (op) { + case BASEP_CQS_WAIT_OPERATION_LE: + string = "le"; + break; + case BASEP_CQS_WAIT_OPERATION_GT: + string = "gt"; + break; + default: + string = "??"; + break; + } + return string; +} + +/** + * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation. + * + * @op: The numerical value of operation. + * + * Return: const static pointer to the command name, or '??' if unknown. + */ +static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op) +{ + const char *string; + + switch (op) { + case BASEP_CQS_SET_OPERATION_ADD: + string = "add"; + break; + case BASEP_CQS_SET_OPERATION_SET: + string = "set"; + break; + default: + string = "???"; + break; + } + return string; +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS + * Wait Operation command, contained + * in a KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file, + struct kbase_kcpu_command *cmd) +{ + size_t i; + struct kbase_context *kctx = file->private; + + for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { + struct base_cqs_wait_operation_info *wait_op = + &cmd->info.cqs_wait_operation.objs[i]; + const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation); + + u64 live_val; + int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val); + + bool live_val_valid = (ret >= 0); + + seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); + + if (live_val_valid) + seq_printf(file, "0x%.16llx", live_val); + else + seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + + seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); + } +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS + * Set Operation command, contained + * in a KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file, + struct kbase_kcpu_command *cmd) +{ + size_t i; + struct kbase_context *kctx = file->private; + + for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { + struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i]; + const char *op_name = kbasep_csf_sync_get_set_op_name( + (basep_cqs_set_operation_op)set_op->operation); + + u64 live_val; + int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val); + + bool live_val_valid = (ret >= 0); + + seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); + + if (live_val_valid) + seq_printf(file, "0x%.16llx", live_val); + else + seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + + seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val); + } +} + +/** + * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue + * + * @file: The seq_file to print to. + * @queue: Pointer to the KCPU queue. + */ +static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file, + struct kbase_kcpu_command_queue *queue) +{ + char started_or_pending; + struct kbase_kcpu_command *cmd; + struct kbase_context *kctx = file->private; + size_t i; + + if (WARN_ON(!queue)) + return; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + mutex_lock(&queue->lock); + + for (i = 0; i != queue->num_pending_cmds; ++i) { + started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; + seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id, + started_or_pending); + + cmd = &queue->commands[queue->start_offset + i]; + switch (cmd->type) { +#if IS_ENABLED(CONFIG_SYNC_FILE) + case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: + kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL"); + break; + case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: + kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT"); + break; +#endif + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: + kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET: + kbasep_csf_sync_print_kcpu_cqs_set(file, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd); + break; + default: + seq_puts(file, ", U, Unknown blocking command"); + break; + } + + seq_puts(file, "\n"); + } + + mutex_unlock(&queue->lock); +} + +/** + * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info + * + * @file: The seq_file for printing to. + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file) +{ + struct kbase_context *kctx = file->private; + unsigned long queue_idx; + + mutex_lock(&kctx->csf.kcpu_queues.lock); + seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id); + + queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); + + while (queue_idx < KBASEP_MAX_KCPU_QUEUES) { + kbasep_csf_sync_kcpu_debugfs_print_queue(file, + kctx->csf.kcpu_queues.array[queue_idx]); + + queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, + queue_idx + 1); + } + + mutex_unlock(&kctx->csf.kcpu_queues.lock); + return 0; +} + +/** + * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations + * from a MOVE instruction. + * + * @move_cmd: Raw MOVE instruction. + * @sync_addr_reg: Register identifier from SYNC_* instruction. + * @compare_val_reg: Register identifier from SYNC_* instruction. + * @sync_val: Pointer to store CQS object address for sync operation. + * @compare_val: Pointer to store compare value for sync operation. + * + * Return: True if value is obtained by checking for correct register identifier, + * or false otherwise. + */ +static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg, + u64 compare_val_reg, u64 *sync_val, + u64 *compare_val) +{ + u64 imm_mask; + + /* Verify MOVE instruction and get immediate mask */ + if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE) + imm_mask = MOVE32_IMM_MASK; + else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE) + imm_mask = MOVE_IMM_MASK; + else + /* Error return */ + return false; + + /* Verify value from MOVE instruction and assign to variable */ + if (sync_addr_reg == MOVE_DEST_GET(move_cmd)) + *sync_val = move_cmd & imm_mask; + else if (compare_val_reg == MOVE_DEST_GET(move_cmd)) + *compare_val = move_cmd & imm_mask; + else + /* Error return */ + return false; + + return true; +} + +/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided + * offset. + * + * @queue: Pointer to the queue. + * @ringbuff_offset: Ringbuffer offset. + * + * Return: the u64 in the ringbuffer at the desired offset. + */ +static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset) +{ + u64 page_off = ringbuff_offset >> PAGE_SHIFT; + u64 offset_within_page = ringbuff_offset & ~PAGE_MASK; + struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]); + u64 *ringbuffer = kmap_atomic(page); + u64 value = ringbuffer[offset_within_page / sizeof(u64)]; + + kunmap_atomic(ringbuffer); + return value; +} + +/** + * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command. + * + * @file: Pointer to debugfs seq_file file struct for writing output. + * @kctx: Pointer to kbase context. + * @queue: Pointer to the GPU command queue. + * @ringbuff_offset: Offset to index the ring buffer with, for the given sync command. + * (Useful for finding preceding MOVE commands) + * @sync_cmd: Entire u64 of the sync command, which has both sync address and + * comparison-value encoded in it. + * @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT). + * @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false). + * @follows_wait: Bool to indicate if the operation follows at least one wait + * operation. Used to determine whether it's pending or started. + */ +static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx, + struct kbase_queue *queue, u32 ringbuff_offset, + u64 sync_cmd, enum debugfs_gpu_sync_type type, + bool is_64bit, bool follows_wait) +{ + u64 sync_addr = 0, compare_val = 0, live_val = 0; + u64 move_cmd; + u8 sync_addr_reg, compare_val_reg, wait_condition = 0; + int err; + + static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" }; + static const char *const gpu_sync_type_op[] = { + "wait", /* This should never be printed, only included to simplify indexing */ + "set", "add" + }; + + if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) { + dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!"); + return; + } + + /* We expect there to be at least 2 preceding MOVE instructions, and + * Base will always arrange for the 2 MOVE + SYNC instructions to be + * contiguously located, and is therefore never expected to be wrapped + * around the ringbuffer boundary. + */ + if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) { + dev_warn(kctx->kbdev->dev, + "Unexpected wraparound detected between %s & MOVE instruction", + gpu_sync_type_name[type]); + return; + } + + /* 1. Get Register identifiers from SYNC_* instruction */ + sync_addr_reg = SYNC_SRC0_GET(sync_cmd); + compare_val_reg = SYNC_SRC1_GET(sync_cmd); + + /* 2. Get values from first MOVE command */ + ringbuff_offset -= sizeof(u64); + move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); + if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, + &sync_addr, &compare_val)) + return; + + /* 3. Get values from next MOVE command */ + ringbuff_offset -= sizeof(u64); + move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); + if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, + &sync_addr, &compare_val)) + return; + + /* 4. Get CQS object value */ + if (is_64bit) + err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val); + else + err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val)); + + if (err) + return; + + /* 5. Print info */ + seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle, + queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P', + gpu_sync_type_name[type]); + + if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID) + seq_puts(file, "slot:-"); + else + seq_printf(file, "slot:%d", (int)queue->group->csg_nr); + + seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val); + + if (type == DEBUGFS_GPU_SYNC_WAIT) { + wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd); + seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition)); + } else + seq_printf(file, "op:%s ", gpu_sync_type_op[type]); + + seq_printf(file, "arg_value:0x%.16llx\n", compare_val); +} + +/** + * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information. + * + * @file: seq_file for printing to. + * @queue: Address of a GPU command queue to examine. + * + * This function will iterate through each command in the ring buffer of the given GPU queue from + * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and + * print relevant information to the debugfs file. + * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e. + * when there are no more commands to view) or a number of consumed GPU CALL commands have + * been observed. + */ +static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue) +{ + struct kbase_context *kctx; + u32 *addr; + u64 cs_extract, cs_insert, instr, cursor; + bool follows_wait = false; + int nr_calls = 0; + + if (!queue) + return; + + kctx = queue->kctx; + + addr = (u32 *)queue->user_io_addr; + cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32); + + addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32); + + cursor = cs_extract; + + if (!is_power_of_2(queue->size)) { + dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2", + queue->csi_index, queue->size); + return; + } + + while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) { + bool instr_is_64_bit = false; + /* Calculate offset into ringbuffer from the absolute cursor, + * by finding the remainder of the cursor divided by the + * ringbuffer size. The ringbuffer size is guaranteed to be + * a power of 2, so the remainder can be calculated without an + * explicit modulo. queue->size - 1 is the ringbuffer mask. + */ + u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1)); + + /* Find instruction that cursor is currently on */ + instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset); + + switch (INSTR_OPCODE_GET(instr)) { + case GPU_CSF_SYNC_ADD64_OPCODE: + case GPU_CSF_SYNC_SET64_OPCODE: + case GPU_CSF_SYNC_WAIT64_OPCODE: + instr_is_64_bit = true; + default: + break; + } + + switch (INSTR_OPCODE_GET(instr)) { + case GPU_CSF_SYNC_ADD_OPCODE: + case GPU_CSF_SYNC_ADD64_OPCODE: + kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, + instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit, + follows_wait); + break; + case GPU_CSF_SYNC_SET_OPCODE: + case GPU_CSF_SYNC_SET64_OPCODE: + kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, + instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit, + follows_wait); + break; + case GPU_CSF_SYNC_WAIT_OPCODE: + case GPU_CSF_SYNC_WAIT64_OPCODE: + kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, + instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit, + follows_wait); + follows_wait = true; /* Future commands will follow at least one wait */ + break; + case GPU_CSF_CALL_OPCODE: + nr_calls++; + /* Fallthrough */ + default: + /* Unrecognized command, skip past it */ + break; + } + + cursor += sizeof(u64); + } +} + +/** + * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of + * the provided queue group. + * + * @file: seq_file for printing to. + * @group: Address of a GPU command group to iterate through. + * + * This function will iterate through each queue in the provided GPU queue group and + * print its SYNC related commands. + */ +static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file, + struct kbase_queue_group *const group) +{ + struct kbase_context *kctx = file->private; + unsigned int i; + + seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle, + group->csg_nr, kctx->tgid, kctx->id); + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) + kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]); +} + +/** + * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info + * + * @file: The seq_file for printing to. + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) +{ + u32 gr; + struct kbase_context *kctx = file->private; + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return -EINVAL; + + kbdev = kctx->kbdev; + kbase_csf_scheduler_lock(kbdev); + kbase_csf_debugfs_update_active_groups_status(kbdev); + + for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[gr].resident_group; + if (!group || group->kctx != kctx) + continue; + kbasep_csf_dump_active_group_sync_state(file, group); + } + + kbase_csf_scheduler_unlock(kbdev); + return 0; +} + +/** + * kbasep_csf_sync_debugfs_show() - Print CSF queue sync information + * + * @file: The seq_file for printing to. + * @data: The debugfs dentry private data, a pointer to kbase_context. + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data) +{ + seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION); + + kbasep_csf_sync_kcpu_debugfs_show(file); + kbasep_csf_sync_gpu_debugfs_show(file); + return 0; +} + +static int kbasep_csf_sync_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_csf_sync_debugfs_show, in->i_private); +} + +static const struct file_operations kbasep_csf_sync_debugfs_fops = { + .open = kbasep_csf_sync_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/** + * kbase_csf_sync_debugfs_init() - Initialise debugfs file. + * + * @kctx: Kernel context pointer. + */ +void kbase_csf_sync_debugfs_init(struct kbase_context *kctx) +{ + struct dentry *file; + const mode_t mode = 0444; + + if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + file = debugfs_create_file("csf_sync", mode, kctx->kctx_dentry, kctx, + &kbasep_csf_sync_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) + dev_warn(kctx->kbdev->dev, "Unable to create CSF Sync debugfs entry"); +} + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_csf_sync_debugfs_init(struct kbase_context *kctx) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h new file mode 100644 index 000000000000..177e15d85341 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_ +#define _KBASE_CSF_SYNC_DEBUGFS_H_ + +/* Forward declaration */ +struct kbase_context; + +#define MALI_CSF_SYNC_DEBUGFS_VERSION 0 + +/** + * kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info + * + * @kctx: The kbase_context for which to create the debugfs entry + */ +void kbase_csf_sync_debugfs_init(struct kbase_context *kctx); + +#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c index 909362da0047..14d80970ff70 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c @@ -101,7 +101,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk( * @kctx: kbase context the chunk belongs to. * @chunk: The chunk whose external mappings are going to be removed. * - * This function marks the region as DONT NEED. Along with KBASE_REG_NO_USER_FREE, this indicates + * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other * parts of kbase outside of tiler heap management should not take references on its physical * pages, and should not modify them. @@ -227,12 +227,14 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, kbase_gpu_vm_lock(kctx); kbase_vunmap(kctx, &chunk->map); /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT - * regions), and so we must clear that flag too before freeing + * regions), and so we must clear that flag too before freeing. + * For "no user free", we check that the refcount is 1 as it is a shrinkable region; + * no other code part within kbase can take a reference to it. */ + WARN_ON(chunk->region->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, chunk->region); #if !defined(CONFIG_MALI_VECTOR_DUMP) - chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED); -#else - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); kbase_gpu_vm_unlock(kctx); @@ -297,7 +299,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * kbase_gpu_vm_lock(kctx); - /* Some checks done here as KBASE_REG_NO_USER_FREE still allows such things to be made + /* Some checks done here as NO_USER_FREE still allows such things to be made * whilst we had dropped the region lock */ if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) { @@ -305,32 +307,45 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * goto unroll_region; } + /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another + * thread can have the "no user free" refcount increased between kbase_mem_alloc + * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by + * remove_external_chunk_mappings (below). + * + * It should be fine and not a security risk if we let the region leak till + * region tracker termination in such a case. + */ + if (unlikely(chunk->region->no_user_free_refcnt > 1)) { + dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n"); + goto unroll_region; + } + /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE * being requested, it's useful to document in code what those restrictions are, and ensure * they remain in place in future. */ if (WARN(!chunk->region->gpu_alloc, - "KBASE_REG_NO_USER_FREE chunks should not have had their alloc freed")) { + "NO_USER_FREE chunks should not have had their alloc freed")) { goto unroll_region; } if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE, - "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { + "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { goto unroll_region; } if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC), - "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { + "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { goto unroll_region; } if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED), - "KBASE_REG_NO_USER_FREE chunks should not have been made ephemeral")) { + "NO_USER_FREE chunks should not have been made ephemeral")) { goto unroll_region; } if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1, - "KBASE_REG_NO_USER_FREE chunks should not have been aliased")) { + "NO_USER_FREE chunks should not have been aliased")) { goto unroll_region; } @@ -344,16 +359,21 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * remove_external_chunk_mappings(kctx, chunk); kbase_gpu_vm_unlock(kctx); + /* If page migration is enabled, we don't want to migrate tiler heap pages. + * This does not change if the constituent pages are already marked as isolated. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); + return chunk; unroll_region: /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT * regions), and so we must clear that flag too before freeing. */ + kbase_va_region_no_user_free_put(kctx, chunk->region); #if !defined(CONFIG_MALI_VECTOR_DUMP) - chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED); -#else - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); kbase_gpu_vm_unlock(kctx); @@ -511,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) if (heap->buf_desc_reg) { kbase_vunmap(kctx, &heap->buf_desc_map); kbase_gpu_vm_lock(kctx); - heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg); kbase_gpu_vm_unlock(kctx); } @@ -629,8 +649,8 @@ static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *co return false; } - if (!(reg->flags & KBASE_REG_CPU_RD) || (reg->flags & KBASE_REG_DONT_NEED) || - (reg->flags & KBASE_REG_PF_GROW) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)) { + if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) || + (reg->flags & KBASE_REG_PF_GROW)) { dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags); return false; } @@ -719,14 +739,17 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_ /* If we don't prevent userspace from unmapping this, we may run into * use-after-free, as we don't check for the existence of the region throughout. */ - buf_desc_reg->flags |= KBASE_REG_NO_USER_FREE; heap->buf_desc_va = buf_desc_va; - heap->buf_desc_reg = buf_desc_reg; + heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg); vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE, KBASE_REG_CPU_RD, &heap->buf_desc_map, KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); + + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); + kbase_gpu_vm_unlock(kctx); if (unlikely(!vmap_ptr)) { @@ -811,7 +834,7 @@ heap_context_alloc_failed: buf_desc_vmap_failed: if (heap->buf_desc_reg) { kbase_gpu_vm_lock(kctx); - heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg); kbase_gpu_vm_unlock(kctx); } buf_desc_not_suitable: @@ -866,6 +889,25 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, return err; } +/** + * validate_allocation_request - Check whether the chunk allocation request + * received on tiler OOM should be handled at + * current time. + * + * @heap: The tiler heap the OOM is associated with + * @nr_in_flight: Number of fragment jobs in flight + * @pending_frag_count: Number of pending fragment jobs + * + * Context: must hold the tiler heap lock to guarantee its lifetime + * + * Return: + * * 0 - allowed to allocate an additional chunk + * * -EINVAL - invalid + * * -EBUSY - there are fragment jobs still in flight, which may free chunks + * after completing + * * -ENOMEM - the targeted number of in-flight chunks has been reached and + * no new ones will be allocated + */ static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight, u32 pending_frag_count) { diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c index bcab31d27945..069e827d16ff 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c @@ -346,7 +346,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) reclaim->batch = HEAP_SHRINKER_BATCH; #if !defined(CONFIG_MALI_VECTOR_DUMP) +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(reclaim); +#else + register_shrinker(reclaim, "mali-csf-tiler-heap"); +#endif #endif } diff --git a/drivers/gpu/arm/bifrost/debug/Kbuild b/drivers/gpu/arm/bifrost/debug/Kbuild index 6e1f0f75c43e..ebf3ddb763a2 100644 --- a/drivers/gpu/arm/bifrost/debug/Kbuild +++ b/drivers/gpu/arm/bifrost/debug/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,7 @@ bifrost_kbase-y += debug/mali_kbase_debug_ktrace.o ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o + bifrost_kbase-$(CONFIG_MALI_CORESIGHT) += debug/backend/mali_kbase_debug_coresight_csf.o else bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o endif diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c new file mode 100644 index 000000000000..ff5f947e2da5 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c @@ -0,0 +1,851 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +static const char *coresight_state_to_string(enum kbase_debug_coresight_csf_state state) +{ + switch (state) { + case KBASE_DEBUG_CORESIGHT_CSF_DISABLED: + return "DISABLED"; + case KBASE_DEBUG_CORESIGHT_CSF_ENABLED: + return "ENABLED"; + default: + break; + } + + return "UNKNOWN"; +} + +static bool validate_reg_addr(struct kbase_debug_coresight_csf_client *client, + struct kbase_device *kbdev, u32 reg_addr, u8 op_type) +{ + int i; + + if (reg_addr & 0x3) { + dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not 32bit aligned", + op_type, reg_addr); + return false; + } + + for (i = 0; i < client->nr_ranges; i++) { + struct kbase_debug_coresight_csf_address_range *range = &client->addr_ranges[i]; + + if ((range->start <= reg_addr) && (reg_addr <= range->end)) + return true; + } + + dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not in client range", op_type, + reg_addr); + + return false; +} + +static bool validate_op(struct kbase_debug_coresight_csf_client *client, + struct kbase_debug_coresight_csf_op *op) +{ + struct kbase_device *kbdev; + u32 reg; + + if (!op) + return false; + + if (!client) + return false; + + kbdev = (struct kbase_device *)client->drv_data; + + switch (op->type) { + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP: + return true; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM: + if (validate_reg_addr(client, kbdev, op->op.write_imm.reg_addr, op->type)) + return true; + + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE: + for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end; + reg += sizeof(u32)) { + if (!validate_reg_addr(client, kbdev, reg, op->type)) + return false; + } + + return true; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE: + if (!op->op.write.ptr) { + dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type); + break; + } + + if (validate_reg_addr(client, kbdev, op->op.write.reg_addr, op->type)) + return true; + + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ: + if (!op->op.read.ptr) { + dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type); + break; + } + + if (validate_reg_addr(client, kbdev, op->op.read.reg_addr, op->type)) + return true; + + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL: + if (validate_reg_addr(client, kbdev, op->op.poll.reg_addr, op->type)) + return true; + + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND: + fallthrough; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR: + fallthrough; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR: + fallthrough; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT: + if (op->op.bitw.ptr != NULL) + return true; + + dev_err(kbdev->dev, "Invalid bitwise operation pointer"); + + break; + default: + dev_err(kbdev->dev, "Invalid operation %d", op->type); + break; + } + + return false; +} + +static bool validate_seq(struct kbase_debug_coresight_csf_client *client, + struct kbase_debug_coresight_csf_sequence *seq) +{ + struct kbase_debug_coresight_csf_op *ops = seq->ops; + int nr_ops = seq->nr_ops; + int i; + + for (i = 0; i < nr_ops; i++) { + if (!validate_op(client, &ops[i])) + return false; + } + + return true; +} + +static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_op *op) +{ + int result = -EINVAL; + u32 reg; + + dev_dbg(kbdev->dev, "Execute operation %d", op->type); + + switch (op->type) { + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP: + result = 0; + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM: + result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr, + op->op.write_imm.val); + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE: + for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end; + reg += sizeof(u32)) { + result = kbase_csf_firmware_mcu_register_write(kbdev, reg, + op->op.write_imm_range.val); + if (!result) + break; + } + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE: + result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr, + *op->op.write.ptr); + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ: + result = kbase_csf_firmware_mcu_register_read(kbdev, op->op.read.reg_addr, + op->op.read.ptr); + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL: + result = kbase_csf_firmware_mcu_register_poll(kbdev, op->op.poll.reg_addr, + op->op.poll.mask, op->op.poll.val); + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND: + *op->op.bitw.ptr &= op->op.bitw.val; + result = 0; + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR: + *op->op.bitw.ptr |= op->op.bitw.val; + result = 0; + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR: + *op->op.bitw.ptr ^= op->op.bitw.val; + result = 0; + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT: + *op->op.bitw.ptr = ~(*op->op.bitw.ptr); + result = 0; + break; + default: + dev_err(kbdev->dev, "Invalid operation %d", op->type); + break; + } + + return result; +} + +static int coresight_config_enable(struct kbase_device *kbdev, + struct kbase_debug_coresight_csf_config *config) +{ + int ret = 0; + int i; + + if (!config) + return -EINVAL; + + if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED) + return ret; + + for (i = 0; config->enable_seq && !ret && i < config->enable_seq->nr_ops; i++) + ret = execute_op(kbdev, &config->enable_seq->ops[i]); + + if (!ret) { + dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config, + coresight_state_to_string(config->state), + coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED)); + config->state = KBASE_DEBUG_CORESIGHT_CSF_ENABLED; + } + + /* Always assign the return code during config enable. + * It gets propagated when calling config disable. + */ + config->error = ret; + + return ret; +} + +static int coresight_config_disable(struct kbase_device *kbdev, + struct kbase_debug_coresight_csf_config *config) +{ + int ret = 0; + int i; + + if (!config) + return -EINVAL; + + if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED) + return ret; + + for (i = 0; config->disable_seq && !ret && i < config->disable_seq->nr_ops; i++) + ret = execute_op(kbdev, &config->disable_seq->ops[i]); + + if (!ret) { + dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config, + coresight_state_to_string(config->state), + coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED)); + config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED; + } else { + /* Only assign the error if ret is not 0. + * As we don't want to overwrite an error from config enable + */ + if (!config->error) + config->error = ret; + } + + return ret; +} + +void *kbase_debug_coresight_csf_register(void *drv_data, + struct kbase_debug_coresight_csf_address_range *ranges, + int nr_ranges) +{ + struct kbase_debug_coresight_csf_client *client, *client_entry; + struct kbase_device *kbdev; + unsigned long flags; + int k; + + if (unlikely(!drv_data)) { + pr_err("NULL drv_data"); + return NULL; + } + + kbdev = (struct kbase_device *)drv_data; + + if (unlikely(!ranges)) { + dev_err(kbdev->dev, "NULL ranges"); + return NULL; + } + + if (unlikely(!nr_ranges)) { + dev_err(kbdev->dev, "nr_ranges is 0"); + return NULL; + } + + for (k = 0; k < nr_ranges; k++) { + if (ranges[k].end < ranges[k].start) { + dev_err(kbdev->dev, "Invalid address ranges 0x%08x - 0x%08x", + ranges[k].start, ranges[k].end); + return NULL; + } + } + + client = kzalloc(sizeof(struct kbase_debug_coresight_csf_client), GFP_KERNEL); + + if (!client) + return NULL; + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + list_for_each_entry(client_entry, &kbdev->csf.coresight.clients, link) { + struct kbase_debug_coresight_csf_address_range *client_ranges = + client_entry->addr_ranges; + int i; + + for (i = 0; i < client_entry->nr_ranges; i++) { + int j; + + for (j = 0; j < nr_ranges; j++) { + if ((ranges[j].start < client_ranges[i].end) && + (client_ranges[i].start < ranges[j].end)) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + kfree(client); + dev_err(kbdev->dev, + "Client with range 0x%08x - 0x%08x already present at address range 0x%08x - 0x%08x", + client_ranges[i].start, client_ranges[i].end, + ranges[j].start, ranges[j].end); + + return NULL; + } + } + } + } + + client->drv_data = drv_data; + client->addr_ranges = ranges; + client->nr_ranges = nr_ranges; + list_add(&client->link, &kbdev->csf.coresight.clients); + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + return client; +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_register); + +void kbase_debug_coresight_csf_unregister(void *client_data) +{ + struct kbase_debug_coresight_csf_client *client; + struct kbase_debug_coresight_csf_config *config_entry; + struct kbase_device *kbdev; + unsigned long flags; + bool retry = true; + + if (unlikely(!client_data)) { + pr_err("NULL client"); + return; + } + + client = (struct kbase_debug_coresight_csf_client *)client_data; + + kbdev = (struct kbase_device *)client->drv_data; + if (unlikely(!kbdev)) { + pr_err("NULL drv_data in client"); + return; + } + + /* check for active config from client */ + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + list_del_init(&client->link); + + while (retry && !list_empty(&kbdev->csf.coresight.configs)) { + retry = false; + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + if (config_entry->client == client) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + kbase_debug_coresight_csf_config_free(config_entry); + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + retry = true; + break; + } + } + } + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + kfree(client); +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_unregister); + +void * +kbase_debug_coresight_csf_config_create(void *client_data, + struct kbase_debug_coresight_csf_sequence *enable_seq, + struct kbase_debug_coresight_csf_sequence *disable_seq) +{ + struct kbase_debug_coresight_csf_client *client; + struct kbase_debug_coresight_csf_config *config; + struct kbase_device *kbdev; + + if (unlikely(!client_data)) { + pr_err("NULL client"); + return NULL; + } + + client = (struct kbase_debug_coresight_csf_client *)client_data; + + kbdev = (struct kbase_device *)client->drv_data; + if (unlikely(!kbdev)) { + pr_err("NULL drv_data in client"); + return NULL; + } + + if (enable_seq) { + if (!validate_seq(client, enable_seq)) { + dev_err(kbdev->dev, "Invalid enable_seq"); + return NULL; + } + } + + if (disable_seq) { + if (!validate_seq(client, disable_seq)) { + dev_err(kbdev->dev, "Invalid disable_seq"); + return NULL; + } + } + + config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL); + if (WARN_ON(!client)) + return NULL; + + config->client = client; + config->enable_seq = enable_seq; + config->disable_seq = disable_seq; + config->error = 0; + config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED; + + INIT_LIST_HEAD(&config->link); + + return config; +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_config_create); + +void kbase_debug_coresight_csf_config_free(void *config_data) +{ + struct kbase_debug_coresight_csf_config *config; + + if (unlikely(!config_data)) { + pr_err("NULL config"); + return; + } + + config = (struct kbase_debug_coresight_csf_config *)config_data; + + kbase_debug_coresight_csf_config_disable(config); + + kfree(config); +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_config_free); + +int kbase_debug_coresight_csf_config_enable(void *config_data) +{ + struct kbase_debug_coresight_csf_config *config; + struct kbase_debug_coresight_csf_client *client; + struct kbase_device *kbdev; + struct kbase_debug_coresight_csf_config *config_entry; + unsigned long flags; + int ret = 0; + + if (unlikely(!config_data)) { + pr_err("NULL config"); + return -EINVAL; + } + + config = (struct kbase_debug_coresight_csf_config *)config_data; + client = (struct kbase_debug_coresight_csf_client *)config->client; + + if (unlikely(!client)) { + pr_err("NULL client in config"); + return -EINVAL; + } + + kbdev = (struct kbase_device *)client->drv_data; + if (unlikely(!kbdev)) { + pr_err("NULL drv_data in client"); + return -EINVAL; + } + + /* Check to prevent double entry of config */ + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + if (config_entry == config) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + dev_err(kbdev->dev, "Config already enabled"); + return -EINVAL; + } + } + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + kbase_csf_scheduler_lock(kbdev); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Check the state of Scheduler to confirm the desired state of MCU */ + if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) && + (kbdev->csf.scheduler.state != SCHED_SLEEPING) && + !kbase_csf_scheduler_protected_mode_in_use(kbdev)) || + kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Wait for MCU to reach the stable ON state */ + ret = kbase_pm_wait_for_desired_state(kbdev); + + if (ret) + dev_err(kbdev->dev, + "Wait for PM state failed when enabling coresight config"); + else + ret = coresight_config_enable(kbdev, config); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + } + + /* Add config to next enable sequence */ + if (!ret) { + spin_lock(&kbdev->csf.coresight.lock); + list_add(&config->link, &kbdev->csf.coresight.configs); + spin_unlock(&kbdev->csf.coresight.lock); + } + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + kbase_csf_scheduler_unlock(kbdev); + + return ret; +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_config_enable); + +int kbase_debug_coresight_csf_config_disable(void *config_data) +{ + struct kbase_debug_coresight_csf_config *config; + struct kbase_debug_coresight_csf_client *client; + struct kbase_device *kbdev; + struct kbase_debug_coresight_csf_config *config_entry; + bool found_in_list = false; + unsigned long flags; + int ret = 0; + + if (unlikely(!config_data)) { + pr_err("NULL config"); + return -EINVAL; + } + + config = (struct kbase_debug_coresight_csf_config *)config_data; + + /* Exit early if not enabled prior */ + if (list_empty(&config->link)) + return ret; + + client = (struct kbase_debug_coresight_csf_client *)config->client; + + if (unlikely(!client)) { + pr_err("NULL client in config"); + return -EINVAL; + } + + kbdev = (struct kbase_device *)client->drv_data; + if (unlikely(!kbdev)) { + pr_err("NULL drv_data in client"); + return -EINVAL; + } + + /* Check if the config is in the correct list */ + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + if (config_entry == config) { + found_in_list = true; + break; + } + } + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + if (!found_in_list) { + dev_err(kbdev->dev, "Config looks corrupted"); + return -EINVAL; + } + + kbase_csf_scheduler_lock(kbdev); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Check the state of Scheduler to confirm the desired state of MCU */ + if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) && + (kbdev->csf.scheduler.state != SCHED_SLEEPING) && + !kbase_csf_scheduler_protected_mode_in_use(kbdev)) || + kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Wait for MCU to reach the stable ON state */ + ret = kbase_pm_wait_for_desired_state(kbdev); + + if (ret) + dev_err(kbdev->dev, + "Wait for PM state failed when disabling coresight config"); + else + ret = coresight_config_disable(kbdev, config); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + } else if (kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) { + /* MCU is OFF, so the disable sequence was already executed. + * + * Propagate any error that would have occurred during the enable + * or disable sequence. + * + * This is done as part of the disable sequence, since the call from + * client is synchronous. + */ + ret = config->error; + } + + /* Remove config from next disable sequence */ + spin_lock(&kbdev->csf.coresight.lock); + list_del_init(&config->link); + spin_unlock(&kbdev->csf.coresight.lock); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + kbase_csf_scheduler_unlock(kbdev); + + return ret; +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_config_disable); + +static void coresight_config_enable_all(struct work_struct *data) +{ + struct kbase_device *kbdev = + container_of(data, struct kbase_device, csf.coresight.enable_work); + struct kbase_debug_coresight_csf_config *config_entry; + unsigned long flags; + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + if (coresight_config_enable(kbdev, config_entry)) + dev_err(kbdev->dev, "enable config (0x%pK) failed", config_entry); + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + wake_up_all(&kbdev->csf.coresight.event_wait); +} + +static void coresight_config_disable_all(struct work_struct *data) +{ + struct kbase_device *kbdev = + container_of(data, struct kbase_device, csf.coresight.disable_work); + struct kbase_debug_coresight_csf_config *config_entry; + unsigned long flags; + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + if (coresight_config_disable(kbdev, config_entry)) + dev_err(kbdev->dev, "disable config (0x%pK) failed", config_entry); + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + wake_up_all(&kbdev->csf.coresight.event_wait); +} + +void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev) +{ + unsigned long flags; + + dev_dbg(kbdev->dev, "Coresight state %s before protected mode enter", + coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED)); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + kbase_pm_lock(kbdev); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbdev->csf.coresight.disable_on_pmode_enter = true; + kbdev->csf.coresight.enable_on_pmode_exit = false; + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_pm_wait_for_desired_state(kbdev); + + kbase_pm_unlock(kbdev); +} + +void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "Coresight state %s after protected mode exit", + coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED)); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(kbdev->csf.coresight.disable_on_pmode_enter); + + kbdev->csf.coresight.enable_on_pmode_exit = true; + kbase_pm_update_state(kbdev); +} + +void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state) +{ + if (unlikely(!kbdev)) + return; + + if (unlikely(!kbdev->csf.coresight.workq)) + return; + + dev_dbg(kbdev->dev, "Coresight state %s requested", coresight_state_to_string(state)); + + switch (state) { + case KBASE_DEBUG_CORESIGHT_CSF_DISABLED: + queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.disable_work); + break; + case KBASE_DEBUG_CORESIGHT_CSF_ENABLED: + queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.enable_work); + break; + default: + dev_err(kbdev->dev, "Invalid Coresight state %d", state); + break; + } +} + +bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state) +{ + struct kbase_debug_coresight_csf_config *config_entry; + unsigned long flags; + bool success = true; + + dev_dbg(kbdev->dev, "Coresight check for state: %s", coresight_state_to_string(state)); + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + if (state != config_entry->state) { + success = false; + break; + } + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + return success; +} +KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_check); + +bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state) +{ + const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry; + unsigned long flags; + bool success = true; + + dev_dbg(kbdev->dev, "Coresight wait for state: %s", coresight_state_to_string(state)); + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, + link) { + const enum kbase_debug_coresight_csf_state prev_state = config_entry->state; + long remaining; + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + remaining = wait_event_timeout(kbdev->csf.coresight.event_wait, + state == config_entry->state, wait_timeout); + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + if (!remaining) { + success = false; + dev_err(kbdev->dev, + "Timeout waiting for Coresight state transition %s to %s", + coresight_state_to_string(prev_state), + coresight_state_to_string(state)); + } + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + return success; +} +KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_wait); + +int kbase_debug_coresight_csf_init(struct kbase_device *kbdev) +{ + kbdev->csf.coresight.workq = alloc_ordered_workqueue("Mali CoreSight workqueue", 0); + if (kbdev->csf.coresight.workq == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&kbdev->csf.coresight.clients); + INIT_LIST_HEAD(&kbdev->csf.coresight.configs); + INIT_WORK(&kbdev->csf.coresight.enable_work, coresight_config_enable_all); + INIT_WORK(&kbdev->csf.coresight.disable_work, coresight_config_disable_all); + init_waitqueue_head(&kbdev->csf.coresight.event_wait); + spin_lock_init(&kbdev->csf.coresight.lock); + + kbdev->csf.coresight.disable_on_pmode_enter = false; + kbdev->csf.coresight.enable_on_pmode_exit = false; + + return 0; +} + +void kbase_debug_coresight_csf_term(struct kbase_device *kbdev) +{ + struct kbase_debug_coresight_csf_client *client_entry, *next_client_entry; + struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry; + unsigned long flags; + + kbdev->csf.coresight.disable_on_pmode_enter = false; + kbdev->csf.coresight.enable_on_pmode_exit = false; + + cancel_work_sync(&kbdev->csf.coresight.enable_work); + cancel_work_sync(&kbdev->csf.coresight.disable_work); + destroy_workqueue(kbdev->csf.coresight.workq); + kbdev->csf.coresight.workq = NULL; + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, + link) { + list_del_init(&config_entry->link); + kfree(config_entry); + } + + list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients, + link) { + list_del_init(&client_entry->link); + kfree(client_entry); + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); +} diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h new file mode 100644 index 000000000000..06d62dc70182 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ +#define _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ + +#include +#include + +/** + * struct kbase_debug_coresight_csf_client - Coresight client definition + * + * @drv_data: Pointer to driver device data. + * @addr_ranges: Arrays of address ranges used by the registered client. + * @nr_ranges: Size of @addr_ranges array. + * @link: Link item of a Coresight client. + * Linked to &struct_kbase_device.csf.coresight.clients. + */ +struct kbase_debug_coresight_csf_client { + void *drv_data; + struct kbase_debug_coresight_csf_address_range *addr_ranges; + u32 nr_ranges; + struct list_head link; +}; + +/** + * enum kbase_debug_coresight_csf_state - Coresight configuration states + * + * @KBASE_DEBUG_CORESIGHT_CSF_DISABLED: Coresight configuration is disabled. + * @KBASE_DEBUG_CORESIGHT_CSF_ENABLED: Coresight configuration is enabled. + */ +enum kbase_debug_coresight_csf_state { + KBASE_DEBUG_CORESIGHT_CSF_DISABLED = 0, + KBASE_DEBUG_CORESIGHT_CSF_ENABLED, +}; + +/** + * struct kbase_debug_coresight_csf_config - Coresight configuration definition + * + * @client: Pointer to the client for which the configuration is created. + * @enable_seq: Array of operations for Coresight client enable sequence. Can be NULL. + * @disable_seq: Array of operations for Coresight client disable sequence. Can be NULL. + * @state: Current Coresight configuration state. + * @error: Error code used to know if an error occurred during the execution + * of the enable or disable sequences. + * @link: Link item of a Coresight configuration. + * Linked to &struct_kbase_device.csf.coresight.configs. + */ +struct kbase_debug_coresight_csf_config { + void *client; + struct kbase_debug_coresight_csf_sequence *enable_seq; + struct kbase_debug_coresight_csf_sequence *disable_seq; + enum kbase_debug_coresight_csf_state state; + int error; + struct list_head link; +}; + +/** + * struct kbase_debug_coresight_device - Object representing the Coresight device + * + * @clients: List head to maintain Coresight clients. + * @configs: List head to maintain Coresight configs. + * @lock: A lock to protect client/config lists. + * Lists can be accessed concurrently by + * Coresight kernel modules and kernel threads. + * @workq: Work queue for Coresight enable/disable execution. + * @enable_work: Work item used to enable Coresight. + * @disable_work: Work item used to disable Coresight. + * @event_wait: Wait queue for Coresight events. + * @enable_on_pmode_exit: Flag used by the PM state machine to + * identify if Coresight enable is needed. + * @disable_on_pmode_enter: Flag used by the PM state machine to + * identify if Coresight disable is needed. + */ +struct kbase_debug_coresight_device { + struct list_head clients; + struct list_head configs; + spinlock_t lock; + struct workqueue_struct *workq; + struct work_struct enable_work; + struct work_struct disable_work; + wait_queue_head_t event_wait; + bool enable_on_pmode_exit; + bool disable_on_pmode_enter; +}; + +/** + * kbase_debug_coresight_csf_init - Initialize Coresight resources. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function should be called once at device initialization. + * + * Return: 0 on success. + */ +int kbase_debug_coresight_csf_init(struct kbase_device *kbdev); + +/** + * kbase_debug_coresight_csf_term - Terminate Coresight resources. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function should be called at device termination to prevent any + * memory leaks if Coresight module would have been removed without calling + * kbasep_debug_coresight_csf_trace_disable(). + */ +void kbase_debug_coresight_csf_term(struct kbase_device *kbdev); + +/** + * kbase_debug_coresight_csf_disable_pmode_enter - Disable Coresight on Protected + * mode enter. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function should be called just before requesting to enter protected mode. + * It will trigger a PM state machine transition from MCU_ON + * to ON_PMODE_ENTER_CORESIGHT_DISABLE. + */ +void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev); + +/** + * kbase_debug_coresight_csf_enable_pmode_exit - Enable Coresight on Protected + * mode enter. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function should be called after protected mode exit is acknowledged. + * It will trigger a PM state machine transition from MCU_ON + * to ON_PMODE_EXIT_CORESIGHT_ENABLE. + */ +void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev); + +/** + * kbase_debug_coresight_csf_state_request - Request Coresight state transition. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @state: Coresight state to check for. + */ +void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state); + +/** + * kbase_debug_coresight_csf_state_check - Check Coresight state. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @state: Coresight state to check for. + * + * Return: true if all states of configs are @state. + */ +bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state); + +/** + * kbase_debug_coresight_csf_state_wait - Wait for Coresight state transition to complete. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @state: Coresight state to wait for. + * + * Return: true if all configs become @state in pre-defined time period. + */ +bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state); + +#endif /* _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c index 277569381292..e123b3ac57ac 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c @@ -29,10 +29,7 @@ #include #include #include - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include -#endif #include #include @@ -92,13 +89,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) goto fail_timer; #ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI +#if IS_ENABLED(CONFIG_MALI_REAL_HW) if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); err = -EINVAL; goto fail_interrupt_test; } -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ kbase_ipa_control_init(kbdev); @@ -142,9 +139,9 @@ fail_pm_metrics_init: kbase_ipa_control_term(kbdev); #ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI +#if IS_ENABLED(CONFIG_MALI_REAL_HW) fail_interrupt_test: -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ kbase_backend_timer_term(kbdev); @@ -283,12 +280,13 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) } static const struct kbase_device_init dev_init[] = { -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, -#else +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) + { kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed" }, +#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { assign_irqs, NULL, "IRQ search failed" }, { registers_map, registers_unmap, "Register map failed" }, -#endif +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, @@ -344,6 +342,10 @@ static const struct kbase_device_init dev_init[] = { { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed" }, { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + { kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term, + "Coresight initialization failed" }, +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ }; static void kbase_device_term_partial(struct kbase_device *kbdev, diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c index 3b792968a7d7..2abd62aaa8b1 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -149,9 +150,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) dev_dbg(kbdev->dev, "Doorbell mirror interrupt received"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -#ifdef CONFIG_MALI_BIFROST_DEBUG - WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev)); -#endif kbase_pm_disable_db_mirror_interrupt(kbdev); kbdev->pm.backend.exit_gpu_sleep_mode = true; kbase_csf_scheduler_invoke_tick(kbdev); @@ -189,7 +187,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) } #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -static bool kbase_is_register_accessible(u32 offset) +bool kbase_is_register_accessible(u32 offset) { #ifdef CONFIG_MALI_BIFROST_DEBUG if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) || @@ -201,7 +199,9 @@ static bool kbase_is_register_accessible(u32 offset) return true; } +#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#if IS_ENABLED(CONFIG_MALI_REAL_HW) void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) { if (WARN_ON(!kbdev->pm.backend.gpu_powered)) diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c index 52063fb0f533..38223af213d1 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c @@ -106,7 +106,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#if IS_ENABLED(CONFIG_MALI_REAL_HW) void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) { WARN_ON(!kbdev->pm.backend.gpu_powered); @@ -140,4 +140,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) return val; } KBASE_EXPORT_TEST_API(kbase_reg_read); -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c index 129b4e430c52..6f0ec7d933c4 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c @@ -30,10 +30,7 @@ #include #include #include - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include @@ -74,13 +71,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) goto fail_timer; #ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI +#if IS_ENABLED(CONFIG_MALI_REAL_HW) if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); err = -EINVAL; goto fail_interrupt_test; } -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ err = kbase_job_slot_init(kbdev); @@ -119,9 +116,9 @@ fail_devfreq_init: fail_job_slot: #ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI +#if IS_ENABLED(CONFIG_MALI_REAL_HW) fail_interrupt_test: -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ kbase_backend_timer_term(kbdev); @@ -213,12 +210,13 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd } static const struct kbase_device_init dev_init[] = { -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, -#else +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) + { kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed" }, +#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { assign_irqs, NULL, "IRQ search failed" }, { registers_map, registers_unmap, "Register map failed" }, -#endif +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c index 053400bd63f0..fb3e4176395e 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c @@ -328,6 +328,9 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) kbdev->num_of_atoms_hw_completed = 0; #endif +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + atomic_set(&kbdev->live_fence_metadata, 0); +#endif return 0; term_as: @@ -351,6 +354,11 @@ void kbase_device_misc_term(struct kbase_device *kbdev) if (kbdev->oom_notifier_block.notifier_call) unregister_oom_notifier(&kbdev->oom_notifier_block); + +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + if (atomic_read(&kbdev->live_fence_metadata) > 0) + dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!"); +#endif } #if !MALI_USE_CSF diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h index 36b4698ca2f8..2c1c6ecec15f 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -99,3 +99,13 @@ int kbase_device_late_init(struct kbase_device *kbdev); * @kbdev: Device pointer */ void kbase_device_late_term(struct kbase_device *kbdev); + +#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +/** + * kbase_is_register_accessible - Checks if register is accessible + * @offset: Register offset + * + * Return: true if the register is accessible, false otherwise. + */ +bool kbase_is_register_accessible(u32 offset); +#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index 9985752a3748..f412531ab03a 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -34,13 +34,11 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h" #include "mali_kbase_hwaccess_time.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" +#include #include #include "mali_kbase_ccswe.h" -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -#include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ /* Ring buffer virtual address start at 4GB */ #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) @@ -103,6 +101,8 @@ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_i static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long *flags) + __acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx) + ctx->kbdev->csf.scheduler.interrupt_lock) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -117,6 +117,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_i static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) + __releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx) + ctx->kbdev->csf.scheduler.interrupt_lock) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -345,7 +347,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( /* Update MMU table */ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, - mmu_sync_info); + mmu_sync_info, NULL, false); if (ret) goto mmu_insert_failed; @@ -480,7 +482,7 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, - fw_ring_buf->num_pages, MCU_AS_NR)); + fw_ring_buf->num_pages, MCU_AS_NR, true)); vunmap(fw_ring_buf->cpu_dump_base); diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c index 9d9889a0e426..669701c29152 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -27,10 +27,7 @@ #include "mali_kbase_hwaccess_instr.h" #include "mali_kbase_hwaccess_time.h" #include "mali_kbase_ccswe.h" - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -#include "backend/gpu/mali_kbase_model_dummy.h" -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#include "backend/gpu/mali_kbase_model_linux.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include "backend/gpu/mali_kbase_pm_internal.h" diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c index 43cdf18a5e3b..21b4e52884c5 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c @@ -23,10 +23,13 @@ #include "mali_kbase.h" /* MEMSYS counter block offsets */ +#define L2_RD_MSG_IN_CU (13) #define L2_RD_MSG_IN (16) #define L2_WR_MSG_IN (18) +#define L2_SNP_MSG_IN (20) #define L2_RD_MSG_OUT (22) #define L2_READ_LOOKUP (26) +#define L2_EXT_READ_NOSNP (30) #define L2_EXT_WRITE_NOSNP_FULL (43) /* SC counter block offsets */ @@ -36,17 +39,23 @@ #define FULL_QUAD_WARPS (21) #define EXEC_INSTR_FMA (27) #define EXEC_INSTR_CVT (28) +#define EXEC_INSTR_SFU (29) #define EXEC_INSTR_MSG (30) #define TEX_FILT_NUM_OPS (39) #define LS_MEM_READ_SHORT (45) #define LS_MEM_WRITE_SHORT (47) #define VARY_SLOT_16 (51) +#define BEATS_RD_LSC_EXT (57) +#define BEATS_RD_TEX (58) +#define BEATS_RD_TEX_EXT (59) +#define FRAG_QUADS_COARSE (68) /* Tiler counter block offsets */ #define IDVS_POS_SHAD_STALL (23) #define PREFETCH_STALL (25) #define VFETCH_POS_READ_WAIT (29) #define VFETCH_VERTEX_WAIT (30) +#define PRIMASSY_STALL (32) #define IDVS_VAR_SHAD_STALL (38) #define ITER_STALL (40) #define PMGR_PTR_RD_STALL (48) @@ -111,6 +120,15 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = { TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT), }; +static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = { + TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL), + TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL), + + MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU), + MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN), + MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP), +}; + /* These tables provide a description of each performance counter * used by the shader cores counter model for energy estimation. */ @@ -150,6 +168,17 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = { SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE), }; +static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = { + SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA), + SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG), + SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX), + SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT), + SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE), + SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT), + SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT), + SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU), +}; + #define IPA_POWER_MODEL_OPS(gpu, init_token) \ const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ .name = "mali-" #gpu "-power-model", \ @@ -181,13 +210,13 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = { #define ALIAS_POWER_MODEL(gpu, as_gpu) \ IPA_POWER_MODEL_OPS(gpu, as_gpu) -/* Reference voltage value is 750 mV. - */ +/* Reference voltage value is 750 mV. */ STANDARD_POWER_MODEL(todx, 750); STANDARD_POWER_MODEL(tgrx, 750); STANDARD_POWER_MODEL(tvax, 750); - STANDARD_POWER_MODEL(ttux, 750); +/* Reference voltage value is 550 mV. */ +STANDARD_POWER_MODEL(ttix, 550); /* Assuming LODX is an alias of TODX for IPA */ ALIAS_POWER_MODEL(lodx, todx); @@ -195,10 +224,14 @@ ALIAS_POWER_MODEL(lodx, todx); /* Assuming LTUX is an alias of TTUX for IPA */ ALIAS_POWER_MODEL(ltux, ttux); +/* Assuming LTUX is an alias of TTUX for IPA */ +ALIAS_POWER_MODEL(ltix, ttix); + static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, &kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops, - &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops + &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops, + &kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops, }; const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( @@ -237,6 +270,10 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) return "mali-ttux-power-model"; case GPU_ID2_PRODUCT_LTUX: return "mali-ltux-power-model"; + case GPU_ID2_PRODUCT_TTIX: + return "mali-ttix-power-model"; + case GPU_ID2_PRODUCT_LTIX: + return "mali-ltix-power-model"; default: return NULL; } diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c index a32a2c207163..cc61f642399c 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -23,10 +23,7 @@ #include "mali_kbase_ipa_counter_common_jm.h" #include "mali_kbase.h" - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -#include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#include /* Performance counter blocks base offsets */ #define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c index d15e98a54c40..b2e6bc459f22 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -84,11 +84,11 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, bool dt_required) { - struct device_node *model_dt_node; + struct device_node *model_dt_node = NULL; char compat_string[64]; - snprintf(compat_string, sizeof(compat_string), "arm,%s", - model->ops->name); + if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name))) + return NULL; /* of_find_compatible_node() will call of_node_put() on the root node, * so take a reference on it first. @@ -111,12 +111,12 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, const char *name, s32 *addr, size_t num_elems, bool dt_required) { - int err, i; + int err = -EINVAL, i; struct device_node *model_dt_node = get_model_dt_node(model, dt_required); char *origin; - err = of_property_read_u32_array(model_dt_node, name, addr, num_elems); + err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems); /* We're done with model_dt_node now, so drop the reference taken in * get_model_dt_node()/of_find_compatible_node(). */ @@ -138,11 +138,17 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, for (i = 0; i < num_elems; ++i) { char elem_name[32]; - if (num_elems == 1) - snprintf(elem_name, sizeof(elem_name), "%s", name); - else - snprintf(elem_name, sizeof(elem_name), "%s.%d", - name, i); + if (num_elems == 1) { + if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) { + err = -ENOMEM; + goto exit; + } + } else { + if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) { + err = -ENOMEM; + goto exit; + } + } dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", model->ops->name, elem_name, addr[i], origin); @@ -164,7 +170,7 @@ int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, int err; struct device_node *model_dt_node = get_model_dt_node(model, dt_required); - const char *string_prop_value; + const char *string_prop_value = ""; char *origin; err = of_property_read_string(model_dt_node, name, diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c index 57508eb24749..8557fe8723cf 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c @@ -236,14 +236,12 @@ static int add_params(struct kbase_ipa_model *model) (struct kbase_ipa_model_simple_data *)model->model_data; err = kbase_ipa_model_add_param_s32(model, "static-coefficient", - &model_data->static_coefficient, - 1, true); + (s32 *)&model_data->static_coefficient, 1, true); if (err) goto end; err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", - &model_data->dynamic_coefficient, - 1, true); + (s32 *)&model_data->dynamic_coefficient, 1, true); if (err) goto end; diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h index e4316981e635..fe8995aefc37 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h @@ -578,7 +578,7 @@ struct kbase_jd_atom { #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) int work_id; #endif - int slot_nr; + unsigned int slot_nr; u32 atom_flags; diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h index d03bcc0f27d8..53819caaf616 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h @@ -132,15 +132,15 @@ void kbasep_js_kctx_term(struct kbase_context *kctx); * Atoms of higher priority might still be able to be pulled from the context * on @js. This helps with starting a high priority atom as soon as possible. */ -static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, - int js, int sched_prio) +static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js, + int sched_prio) { struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); WARN(!slot_tracking->atoms_pulled_pri[sched_prio], - "When marking slot %d as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked", + "When marking slot %u as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked", js, sched_prio); slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio; @@ -509,19 +509,6 @@ void kbasep_js_resume(struct kbase_device *kbdev); bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom); -/** - * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. - * @kctx: Context Pointer - * @prio: Priority (specifies the queue together with js). - * @js: Job slot (specifies the queue together with prio). - * - * Pushes all possible atoms from the linked list to the ringbuffer. - * Number of atoms are limited to free space in the ringbuffer and - * number of available atoms in the linked list. - * - */ -void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); - /** * kbase_js_pull - Pull an atom from a context in the job scheduler for * execution. @@ -536,7 +523,7 @@ void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); * Return: a pointer to an atom, or NULL if there are no atoms for this * slot that can be currently run. */ -struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js); /** * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. @@ -617,7 +604,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); * been used. * */ -void kbase_js_sched(struct kbase_device *kbdev, int js_mask); +void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask); /** * kbase_js_zap_context - Attempt to deschedule a context that is being diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h index ea143ab49642..c6fea791b8c9 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h @@ -131,16 +131,6 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[ BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, - BASE_HW_FEATURE_END -}; - __attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h index a360984acca5..2dc0402197de 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h @@ -64,6 +64,9 @@ enum base_hw_issue { BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -88,6 +91,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -108,6 +113,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -128,6 +135,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -143,6 +152,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMI BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -156,6 +167,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -169,6 +182,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -182,6 +197,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -194,6 +211,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -204,6 +223,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHE BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -217,6 +238,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -230,6 +253,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -242,6 +267,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -253,6 +280,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -263,6 +292,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSI BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -274,6 +305,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -284,6 +317,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDV BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -296,6 +331,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -306,6 +343,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNO BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -318,6 +357,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -330,6 +371,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -340,6 +383,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGO BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -356,6 +401,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -372,6 +419,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -387,6 +436,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -399,6 +450,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTR BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -415,6 +468,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -430,6 +485,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -442,6 +499,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNA BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -456,6 +515,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -469,6 +530,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -482,6 +545,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -495,6 +560,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -507,6 +574,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBE BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -521,6 +590,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -534,6 +605,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -547,6 +620,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -560,6 +635,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -572,90 +649,74 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBA BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, @@ -663,70 +724,97 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0 BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_1997, + BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h index 8e4d36141368..542e8f63fb5b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase.h +++ b/drivers/gpu/arm/bifrost/mali_kbase.h @@ -339,21 +339,8 @@ int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom); -void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom, u32 sw_flags); - -/** - * kbase_job_slot_hardstop - Hard-stop the specified job slot - * @kctx: The kbase context that contains the job(s) that should - * be hard-stopped - * @js: The job slot to hard-stop - * @target_katom: The job that should be hard-stopped (or NULL for all - * jobs from the context) - * Context: - * The job slot lock must be held when calling this function. - */ -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom); +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *target_katom, u32 sw_flags); /** * kbase_job_check_enter_disjoint - potentiall enter disjoint mode @@ -448,19 +435,6 @@ static inline void kbase_free_user_buffer( } } -/** - * kbase_mem_copy_from_extres() - Copy from external resources. - * - * @kctx: kbase context within which the copying is to take place. - * @buf_data: Pointer to the information about external resources: - * pages pertaining to the external resource, number of - * pages to copy. - * - * Return: 0 on success, error code otherwise. - */ -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data); - #if !MALI_USE_CSF int kbase_process_soft_job(struct kbase_jd_atom *katom); int kbase_prepare_soft_job(struct kbase_jd_atom *katom); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c index 08a9a3cd0479..10dbeee02e40 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -98,11 +98,9 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) "unable to create address_spaces debugfs directory"); } else { for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); - debugfs_create_file(as_name, 0444, - debugfs_directory, - (void *)(uintptr_t)i, - &as_fault_fops); + if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i))) + debugfs_create_file(as_name, 0444, debugfs_directory, + (void *)(uintptr_t)i, &as_fault_fops); } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c index fa094ab36b1f..7eb6b5a798ce 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c @@ -31,10 +31,7 @@ #include #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include "backend/gpu/mali_kbase_model_linux.h" -#include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ #include "uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h" #include "mali_kbase_mem.h" #include "mali_kbase_mem_pool_debugfs.h" @@ -632,7 +629,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); #if IS_ENABLED(CONFIG_DEBUG_FS) - snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); + if (unlikely(!scnprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id))) + return -ENOMEM; mutex_init(&kctx->mem_profile_lock); @@ -671,8 +669,10 @@ static int kbase_open(struct inode *inode, struct file *filp) if (!kbdev) return -ENODEV; - /* Set address space operation for page migration */ +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + /* Set address space operations for page migration */ kbase_mem_migrate_set_address_space_ops(kbdev, filp); +#endif /* Device-wide firmware load is moved here from probing to comply with * Android GKI vendor guideline. @@ -1467,6 +1467,9 @@ static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx, static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, union kbase_ioctl_cs_tiler_heap_init *heap_init) { + if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + kctx->jit_group_id = heap_init->in.group_id; return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, @@ -1479,6 +1482,9 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx, union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init) { + if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + kctx->jit_group_id = heap_init->in.group_id; return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, @@ -4278,7 +4284,7 @@ void kbase_protected_mode_term(struct kbase_device *kbdev) kfree(kbdev->protected_dev); } -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) static int kbase_common_reg_map(struct kbase_device *kbdev) { return 0; @@ -4286,7 +4292,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) static void kbase_common_reg_unmap(struct kbase_device * const kbdev) { } -#else /* CONFIG_MALI_BIFROST_NO_MALI */ +#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ static int kbase_common_reg_map(struct kbase_device *kbdev) { int err = 0; @@ -4322,7 +4328,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) kbdev->reg_size = 0; } } -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ int registers_map(struct kbase_device * const kbdev) { @@ -4585,8 +4591,18 @@ int power_control_init(struct kbase_device *kbdev) * from completing its initialization. */ #if defined(CONFIG_PM_OPP) -#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ - defined(CONFIG_REGULATOR)) +#if defined(CONFIG_REGULATOR) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + if (kbdev->nr_regulators > 0) { + kbdev->token = dev_pm_opp_set_regulators(kbdev->dev, regulator_names); + + if (kbdev->token < 0) { + err = kbdev->token; + goto regulators_probe_defer; + } + + } +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) if (kbdev->nr_regulators > 0) { kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev, regulator_names, @@ -4605,7 +4621,9 @@ int power_control_init(struct kbase_device *kbdev) return 0; } } -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ +#endif /* CONFIG_REGULATOR */ + #ifdef CONFIG_ARCH_ROCKCHIP err = kbase_platform_rk_init_opp_table(kbdev); if (err) @@ -4645,13 +4663,17 @@ void power_control_term(struct kbase_device *kbdev) #if defined(CONFIG_PM_OPP) dev_pm_opp_of_remove_table(kbdev->dev); -#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ - defined(CONFIG_REGULATOR)) - if (!IS_ERR_OR_NULL(kbdev->opp_table)) { +#if defined(CONFIG_REGULATOR) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + if (kbdev->token > -EPERM) { dev_pm_opp_unregister_set_opp_helper(kbdev->opp_table); - dev_pm_opp_put_regulators(kbdev->opp_table); + dev_pm_opp_put_regulators(kbdev->token); } -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) + if (!IS_ERR_OR_NULL(kbdev->opp_table)) + dev_pm_opp_put_regulators(kbdev->opp_table); +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ +#endif /* CONFIG_REGULATOR */ #endif /* CONFIG_PM_OPP */ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { @@ -5514,6 +5536,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->dev = &pdev->dev; + +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + kbdev->token = -EPERM; +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ + dev_set_drvdata(kbdev->dev, kbdev); #if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) mutex_lock(&kbase_probe_mutex); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c index 60afde2ceb7f..beb292862b21 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c @@ -69,6 +69,12 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev) } } +void kbase_ctx_sched_init_ctx(struct kbase_context *kctx) +{ + kctx->as_nr = KBASEP_AS_NR_INVALID; + atomic_set(&kctx->refcount, 0); +} + /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space * * @kbdev: The context for which to find a free address space @@ -113,7 +119,7 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) if (atomic_inc_return(&kctx->refcount) == 1) { int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); - if (free_as != KBASEP_AS_NR_INVALID) { + if (free_as >= 0) { kbdev->as_free &= ~(1u << free_as); /* Only program the MMU if the context has not been * assigned the same address space before. @@ -167,8 +173,10 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) */ WARN_ON(!atomic_read(&kctx->refcount)); #endif - WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID); - WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); + if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) + WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); + else + WARN(true, "Invalid as_nr(%d)", kctx->as_nr); atomic_inc(&kctx->refcount); } @@ -182,16 +190,17 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) new_ref_count = atomic_dec_return(&kctx->refcount); if (new_ref_count == 0) { - kbdev->as_free |= (1u << kctx->as_nr); - if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { - KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( - kbdev, kctx->id); - kbdev->as_to_kctx[kctx->as_nr] = NULL; - kctx->as_nr = KBASEP_AS_NR_INVALID; - kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); + if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) { + kbdev->as_free |= (1u << kctx->as_nr); + if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); #if !MALI_USE_CSF - kbase_backend_slot_kctx_purge_locked(kbdev, kctx); + kbase_backend_slot_kctx_purge_locked(kbdev, kctx); #endif + } } } @@ -201,13 +210,14 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) { struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(atomic_read(&kctx->refcount) != 0); - if (kctx->as_nr != KBASEP_AS_NR_INVALID) { + if ((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)) { if (kbdev->pm.backend.gpu_powered) kbase_mmu_disable(kctx); @@ -215,6 +225,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); } void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h index f787cc34ba48..5a8d17547b7b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h @@ -59,6 +59,15 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev); */ void kbase_ctx_sched_term(struct kbase_device *kbdev); +/** + * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling + * @kctx: The context to initialize + * + * This must be called during context initialization before any other context + * scheduling functions are called on @kctx + */ +void kbase_ctx_sched_init_ctx(struct kbase_context *kctx); + /** * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context * @kctx: The context to which to retain a reference @@ -113,9 +122,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); * This function should be called when a context is being destroyed. The * context must no longer have any reference. If it has been assigned an * address space before then the AS will be unprogrammed. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. */ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h index 15fa0d71387a..80f76145e393 100755 --- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h @@ -650,7 +650,6 @@ struct kbase_process { * struct kbase_mem_migrate - Object representing an instance for managing * page migration. * - * @mapping: Pointer to address space struct used for page migration. * @free_pages_list: List of deferred pages to free. Mostly used when page migration * is enabled. Pages in memory pool that require migrating * will be freed instead. However page cannot be freed @@ -661,13 +660,17 @@ struct kbase_process { * @free_pages_workq: Work queue to process the work items queued to free * pages in @free_pages_list. * @free_pages_work: Work item to free pages in @free_pages_list. + * @inode: Pointer to inode whose address space operations are used + * for page migration purposes. */ struct kbase_mem_migrate { - struct address_space *mapping; struct list_head free_pages_list; spinlock_t free_pages_lock; struct workqueue_struct *free_pages_workq; struct work_struct free_pages_work; +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + struct inode *inode; +#endif }; /** @@ -709,6 +712,10 @@ struct kbase_mem_migrate { * @opp_table: Pointer to the device OPP structure maintaining the * link to OPPs attached to a device. This is obtained * after setting regulator names for the device. + * @token: Integer replacement for opp_table in kernel versions + * 6 and greater. Value is a token id number when 0 or greater, + * and a linux errno when negative. Must be initialised + * to an non-zero value as 0 is valid token id. * @devname: string containing the name used for GPU device instance, * miscellaneous device is registered using the same name. * @id: Unique identifier for the device, indicates the number of @@ -906,6 +913,10 @@ struct kbase_mem_migrate { * GPU2019-3878. PM state machine is invoked after * clearing this flag and @hwaccess_lock is used to * serialize the access. + * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction + * and cleared after the transaction completes. PM L2 state is + * prevented from entering powering up/down transitions when the + * flag is set, @hwaccess_lock is used to serialize the access. * @poweroff_pending: Set when power off operation for GPU is started, reset when * power on for GPU is started. * @infinite_cache_active_default: Set to enable using infinite cache for all the @@ -986,6 +997,10 @@ struct kbase_mem_migrate { * @oom_notifier_block: notifier_block containing kernel-registered out-of- * memory handler. * @mem_migrate: Per device object for managing page migration. + * @live_fence_metadata: Count of live fence metadata structures created by + * KCPU queue. These structures may outlive kbase module + * itself. Therefore, in such a case, a warning should be + * be produced. */ struct kbase_device { u32 hw_quirks_sc; @@ -1010,14 +1025,16 @@ struct kbase_device { #if IS_ENABLED(CONFIG_REGULATOR) struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned int nr_regulators; -#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + int token; +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) struct opp_table *opp_table; -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ #endif /* CONFIG_REGULATOR */ char devname[DEVNAME_SIZE]; u32 id; -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) void *model; struct kmem_cache *irq_slab; struct workqueue_struct *irq_workq; @@ -1025,7 +1042,7 @@ struct kbase_device { atomic_t serving_gpu_irq; atomic_t serving_mmu_irq; spinlock_t reg_op_lock; -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ struct kbase_pm_device_data pm; struct kbase_mem_pool_group mem_pools; @@ -1186,6 +1203,7 @@ struct kbase_device { #if MALI_USE_CSF bool mmu_hw_operation_in_progress; #endif + bool mmu_page_migrate_in_progress; bool poweroff_pending; bool infinite_cache_active_default; @@ -1286,6 +1304,10 @@ struct kbase_device { #endif struct kbase_mem_migrate mem_migrate; + +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + atomic_t live_fence_metadata; +#endif }; /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h index dfe33e52b4ce..25986f604c6c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h @@ -33,6 +33,49 @@ #include "mali_kbase_fence_defs.h" #include "mali_kbase.h" +#if MALI_USE_CSF +/* Maximum number of characters in DMA fence timeline name. */ +#define MAX_TIMELINE_NAME (32) + +/** + * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing + * information about KCPU queue. One instance per KCPU + * queue. + * + * @refcount: Atomic value to keep track of number of references to an instance. + * An instance can outlive the KCPU queue itself. + * @kbdev: Pointer to Kbase device. + * @kctx_id: Kbase context ID. + * @timeline_name: String of timeline name for associated fence object. + */ +struct kbase_kcpu_dma_fence_meta { +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + atomic_t refcount; +#else + refcount_t refcount; +#endif + struct kbase_device *kbdev; + int kctx_id; + char timeline_name[MAX_TIMELINE_NAME]; +}; + +/** + * struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a + * reference to metadata containing more informaiton about it. + * + * @base: Fence object itself. + * @metadata: Pointer to metadata structure. + */ +struct kbase_kcpu_dma_fence { +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence base; +#else + struct dma_fence base; +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + struct kbase_kcpu_dma_fence_meta *metadata; +}; +#endif + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) extern const struct fence_ops kbase_fence_ops; #else @@ -167,12 +210,56 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, */ #define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence) +#if MALI_USE_CSF +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence) +#else +static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence) +#endif +{ + if (fence->ops == &kbase_fence_ops) + return (struct kbase_kcpu_dma_fence *)fence; + + return NULL; +} + +static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata) +{ +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + if (atomic_dec_and_test(&metadata->refcount)) { +#else + if (refcount_dec_and_test(&metadata->refcount)) { +#endif + atomic_dec(&metadata->kbdev->live_fence_metadata); + kfree(metadata); + } +} + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline void kbase_kcpu_dma_fence_put(struct fence *fence) +#else +static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence) +#endif +{ + struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence); + + if (kcpu_fence) + kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); +} +#endif /* MALI_USE_CSF */ + /** * kbase_fence_put() - Releases a reference to a fence * @fence: Fence to release reference for. */ -#define kbase_fence_put(fence) dma_fence_put(fence) - +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline void kbase_fence_put(struct fence *fence) +#else +static inline void kbase_fence_put(struct dma_fence *fence) +#endif +{ + dma_fence_put(fence); +} #endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c index be141553c674..25b4c9c03b53 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include static const char * @@ -41,7 +41,13 @@ kbase_fence_get_timeline_name(struct fence *fence) kbase_fence_get_timeline_name(struct dma_fence *fence) #endif { +#if MALI_USE_CSF + struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; + + return kcpu_fence->metadata->timeline_name; +#else return kbase_timeline_name; +#endif /* MALI_USE_CSF */ } static bool @@ -62,24 +68,44 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) #endif { #if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) - snprintf(str, size, "%u", fence->seqno); + const char *format = "%u"; #else - snprintf(str, size, "%llu", fence->seqno); + const char *format = "%llu"; #endif + if (unlikely(!scnprintf(str, size, format, fence->seqno))) + pr_err("Fail to encode fence seqno to string"); } +#if MALI_USE_CSF +static void +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +kbase_fence_release(struct fence *fence) +#else +kbase_fence_release(struct dma_fence *fence) +#endif +{ + struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; + + kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); + kfree(kcpu_fence); +} +#endif + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) extern const struct fence_ops kbase_fence_ops; /* silence checker warning */ -const struct fence_ops kbase_fence_ops = { - .wait = fence_default_wait, +const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait, #else extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */ -const struct dma_fence_ops kbase_fence_ops = { - .wait = dma_fence_default_wait, +const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait, +#endif + .get_driver_name = kbase_fence_get_driver_name, + .get_timeline_name = kbase_fence_get_timeline_name, + .enable_signaling = kbase_fence_enable_signaling, +#if MALI_USE_CSF + .fence_value_str = kbase_fence_fence_value_str, + .release = kbase_fence_release +#else + .fence_value_str = kbase_fence_fence_value_str #endif - .get_driver_name = kbase_fence_get_driver_name, - .get_timeline_name = kbase_fence_get_timeline_name, - .enable_signaling = kbase_fence_enable_signaling, - .fence_value_str = kbase_fence_fence_value_str }; - +KBASE_EXPORT_TEST_API(kbase_fence_ops); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c index 0282aaf8eb3a..7a7d17ea5f26 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c @@ -311,7 +311,6 @@ static void kbase_gpuprops_calculate_props( struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) { int i; - u32 gpu_id; /* Populate the base_gpu_props structure */ kbase_gpuprops_update_core_props_gpu_id(gpu_props); @@ -361,49 +360,23 @@ static void kbase_gpuprops_calculate_props( gpu_props->thread_props.tls_alloc = gpu_props->raw_props.thread_tls_alloc; - /* MIDHARC-2364 was intended for tULx. - * Workaround for the incorrectly applied THREAD_FEATURES to tDUx. - */ - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - #if MALI_USE_CSF - CSTD_UNUSED(gpu_id); gpu_props->thread_props.max_registers = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 0U, 22); + KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22); gpu_props->thread_props.impl_tech = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 22U, 2); + KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2); gpu_props->thread_props.max_task_queue = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 24U, 8); + KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8); gpu_props->thread_props.max_thread_group_split = 0; #else - if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) { - gpu_props->thread_props.max_registers = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 0U, 22); - gpu_props->thread_props.impl_tech = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 22U, 2); - gpu_props->thread_props.max_task_queue = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 24U, 8); - gpu_props->thread_props.max_thread_group_split = 0; - } else { - gpu_props->thread_props.max_registers = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 0U, 16); - gpu_props->thread_props.max_task_queue = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 16U, 8); - gpu_props->thread_props.max_thread_group_split = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 24U, 6); - gpu_props->thread_props.impl_tech = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 30U, 2); - } + gpu_props->thread_props.max_registers = + KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); + gpu_props->thread_props.max_task_queue = + KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); + gpu_props->thread_props.max_thread_group_split = + KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); + gpu_props->thread_props.impl_tech = + KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); #endif /* If values are not specified, then use defaults */ @@ -539,7 +512,7 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); static u32 l2_hash_values[ASN_HASH_COUNT] = { 0, }; -static int num_override_l2_hash_values; +static unsigned int num_override_l2_hash_values; module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000); MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing"); @@ -593,7 +566,7 @@ kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) kbdev->l2_hash_values_override = false; if (num_override_l2_hash_values) { - int i; + unsigned int i; kbdev->l2_hash_values_override = true; for (i = 0; i < num_override_l2_hash_values; i++) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c index b6a8a2e5608f..c658fb79429b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c @@ -68,9 +68,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TBAX: features = base_hw_features_tBAx; break; - case GPU_ID2_PRODUCT_TDUX: - features = base_hw_features_tDUx; - break; case GPU_ID2_PRODUCT_TODX: case GPU_ID2_PRODUCT_LODX: features = base_hw_features_tODx; @@ -211,10 +208,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TDUX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 }, - { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TODX, { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, @@ -235,6 +228,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { GPU_ID2_PRODUCT_TTUX, { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 }, { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, @@ -393,9 +387,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TBAX: issues = base_hw_issues_model_tBAx; break; - case GPU_ID2_PRODUCT_TDUX: - issues = base_hw_issues_model_tDUx; - break; case GPU_ID2_PRODUCT_TODX: case GPU_ID2_PRODUCT_LODX: issues = base_hw_issues_model_tODx; @@ -414,7 +405,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_LTIX: issues = base_hw_issues_model_tTIx; break; - default: dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h index 124a6d643e42..ca77c192deea 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h @@ -97,8 +97,8 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, * Return: true if context is now active, false otherwise (ie if context does * not have an address space assigned) */ -bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, - struct kbase_context *kctx, int js); +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js); /** * kbase_backend_release_ctx_irq - Release a context from the GPU. This will @@ -183,8 +183,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); * * Return: Atom currently at the head of slot @js, or NULL */ -struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, - int js); +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js); /** * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a @@ -194,7 +193,7 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, * * Return: Number of atoms currently on slot */ -int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js); /** * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot @@ -204,7 +203,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); * * Return: Number of atoms currently on slot @js that are currently on the GPU. */ -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js); /** * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs @@ -233,7 +232,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev); * * Return: Number of jobs that can be submitted. */ -int kbase_backend_slot_free(struct kbase_device *kbdev, int js); +int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js); /** * kbase_job_check_leave_disjoint - potentially leave disjoint state @@ -287,8 +286,8 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); * Context: * The job slot lock must be held when calling this function. */ -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom); +void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, + struct kbase_jd_atom *target_katom); /** * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c index f5faa92525c5..f44426a736ca 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c @@ -28,6 +28,11 @@ #include #include #include +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include +#else +#include +#endif #include #include @@ -1074,11 +1079,19 @@ int kbase_jd_submit(struct kbase_context *kctx, return -EINVAL; } + if (nr_atoms > BASE_JD_ATOM_COUNT) { + dev_dbg(kbdev->dev, "Invalid attempt to submit %u atoms at once for kctx %d_%d", + nr_atoms, kctx->tgid, kctx->id); + return -EINVAL; + } + /* All atoms submitted in this call have the same flush ID */ latest_flush = kbase_backend_get_current_flush_id(kbdev); for (i = 0; i < nr_atoms; i++) { - struct base_jd_atom user_atom; + struct base_jd_atom user_atom = { + .seq_nr = 0, + }; struct base_jd_fragment user_jc_incr; struct kbase_jd_atom *katom; @@ -1202,6 +1215,12 @@ while (false) kbase_disjoint_event_potential(kbdev); mutex_unlock(&jctx->lock); + if (fatal_signal_pending(current)) { + dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d", + kctx->tgid, kctx->id); + /* We're being killed so the result code doesn't really matter */ + return 0; + } } if (need_to_try_schedule_context) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_jm.c index 6cbd6f1a423e..1ac5cd3eafff 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,15 +37,13 @@ * * Return: true if slot can still be submitted on, false if slot is now full. */ -static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, - int nr_jobs_to_submit) +static bool kbase_jm_next_job(struct kbase_device *kbdev, unsigned int js, int nr_jobs_to_submit) { struct kbase_context *kctx; int i; kctx = kbdev->hwaccess.active_kctx[js]; - dev_dbg(kbdev->dev, - "Trying to run the next %d jobs in kctx %pK (s:%d)\n", + dev_dbg(kbdev->dev, "Trying to run the next %d jobs in kctx %pK (s:%u)\n", nr_jobs_to_submit, (void *)kctx, js); if (!kctx) @@ -60,7 +58,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, kbase_backend_run_atom(kbdev, katom); } - dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js); + dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%u)\n", js); return false; } @@ -72,7 +70,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask); while (js_mask) { - int js = ffs(js_mask) - 1; + unsigned int js = ffs(js_mask) - 1; int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) @@ -111,14 +109,14 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev) void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == kctx) { - dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", (void *)kctx, + js); kbdev->hwaccess.active_kctx[js] = NULL; } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c index a64d7327a76b..78f2d7d47b3b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c @@ -77,8 +77,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); -static int kbase_js_get_slot(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom); static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_ctx_job_cb *callback); @@ -151,8 +150,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev) * * Return: true if there are no atoms to pull, false otherwise. */ -static inline bool -jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) +static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, unsigned int js, int prio) { bool none_to_pull; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; @@ -161,9 +159,8 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); - dev_dbg(kctx->kbdev->dev, - "Slot %d (prio %d) is %spullable in kctx %pK\n", - js, prio, none_to_pull ? "not " : "", kctx); + dev_dbg(kctx->kbdev->dev, "Slot %u (prio %d) is %spullable in kctx %pK\n", js, prio, + none_to_pull ? "not " : "", kctx); return none_to_pull; } @@ -179,8 +176,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) * Return: true if the ring buffers for all priorities have no pullable atoms, * false otherwise. */ -static inline bool -jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) +static inline bool jsctx_rb_none_to_pull(struct kbase_context *kctx, unsigned int js) { int prio; @@ -212,8 +208,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) * * The HW access lock must always be held when calling this function. */ -static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, - int prio, kbasep_js_ctx_job_cb *callback) +static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js, int prio, + kbasep_js_ctx_job_cb *callback) { struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; @@ -272,7 +268,7 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback * for each entry, and remove the entry from the queue. */ -static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js, +static inline void jsctx_queue_foreach(struct kbase_context *kctx, unsigned int js, kbasep_js_ctx_job_cb *callback) { int prio; @@ -293,15 +289,14 @@ static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js, * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ -static inline struct kbase_jd_atom * -jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) +static inline struct kbase_jd_atom *jsctx_rb_peek_prio(struct kbase_context *kctx, unsigned int js, + int prio) { struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; struct rb_node *node; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kctx->kbdev->dev, - "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n", + dev_dbg(kctx->kbdev->dev, "Peeking runnable tree of kctx %pK for prio %d (s:%u)\n", (void *)kctx, prio, js); node = rb_first(&rb->runnable_tree); @@ -326,8 +321,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ -static inline struct kbase_jd_atom * -jsctx_rb_peek(struct kbase_context *kctx, int js) +static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, unsigned int js) { int prio; @@ -358,7 +352,7 @@ static inline void jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -377,14 +371,14 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) { struct kbase_device *kbdev = kctx->kbdev; int prio = katom->sched_priority; - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n", - (void *)katom, (void *)kctx, js); + dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%u)\n", (void *)katom, + (void *)kctx, js); while (*new) { struct kbase_jd_atom *entry = container_of(*new, @@ -425,15 +419,11 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) jsctx_tree_add(kctx, katom); } -static bool kbase_js_ctx_pullable(struct kbase_context *kctx, - int js, - bool is_scheduled); +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled); static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js); + struct kbase_context *kctx, unsigned int js); static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js); + struct kbase_context *kctx, unsigned int js); typedef bool(katom_ordering_func)(const struct kbase_jd_atom *, const struct kbase_jd_atom *); @@ -645,6 +635,8 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) KBASE_DEBUG_ASSERT(kctx != NULL); + kbase_ctx_sched_init_ctx(kctx); + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); @@ -683,7 +675,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) { struct kbase_device *kbdev; struct kbasep_js_kctx_info *js_kctx_info; - int js; + unsigned int js; bool update_ctx_count = false; unsigned long flags; CSTD_UNUSED(js_kctx_info); @@ -722,6 +714,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&kbdev->js_data.runpool_mutex); } + + kbase_ctx_sched_remove_ctx(kctx); } /* @@ -729,8 +723,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) */ /* Should not normally use directly - use kbase_jsctx_slot_atom_pulled_dec() instead */ -static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, - int js, int sched_prio) +static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, unsigned int js, + int sched_prio) { struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; @@ -742,7 +736,7 @@ static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, NULL, 0, js, (unsigned int)sched_prio); } -static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js) +static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned int js) { return atomic_read(&kctx->slot_tracking[js].atoms_pulled); } @@ -752,7 +746,7 @@ static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js) * - that priority level is blocked * - or, any higher priority level is blocked */ -static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js, +static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, unsigned int js, int sched_prio) { struct kbase_jsctx_slot_tracking *slot_tracking = @@ -792,7 +786,7 @@ static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js, static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, const struct kbase_jd_atom *katom) { - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; int sched_prio = katom->sched_priority; struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; @@ -801,7 +795,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, lockdep_assert_held(&kctx->kbdev->hwaccess_lock); WARN(kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio), - "Should not have pulled atoms for slot %d from a context that is blocked at priority %d or higher", + "Should not have pulled atoms for slot %u from a context that is blocked at priority %d or higher", js, sched_prio); nr_atoms_pulled = atomic_inc_return(&kctx->atoms_pulled_all_slots); @@ -830,7 +824,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, const struct kbase_jd_atom *katom) { - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; int sched_prio = katom->sched_priority; int atoms_pulled_pri; struct kbase_jsctx_slot_tracking *slot_tracking = @@ -879,14 +873,12 @@ static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, * Return: true if caller should call kbase_backend_ctx_count_changed() */ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) + struct kbase_context *kctx, unsigned int js) { bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%u)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -921,14 +913,13 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_pullable_head_nolock( - struct kbase_device *kbdev, struct kbase_context *kctx, int js) +static bool kbase_js_ctx_list_add_pullable_head_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, unsigned int js) { bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%u)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -966,8 +957,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( * Return: true if caller should call kbase_backend_ctx_count_changed() */ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) + struct kbase_context *kctx, unsigned int js) { bool ret; unsigned long flags; @@ -997,14 +987,12 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, * Return: true if caller should call kbase_backend_ctx_count_changed() */ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) + struct kbase_context *kctx, unsigned int js) { bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%u)\n", (void *)kctx, js); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); @@ -1039,9 +1027,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { bool ret = false; @@ -1077,9 +1064,8 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, * Return: Context to use for specified slot. * NULL if no contexts present for specified slot */ -static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( - struct kbase_device *kbdev, - int js) +static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(struct kbase_device *kbdev, + unsigned int js) { struct kbase_context *kctx; int i; @@ -1095,9 +1081,8 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( jctx.sched_info.ctx.ctx_list_entry[js]); list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - dev_dbg(kbdev->dev, - "Popped %pK from the pullable queue (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Popped %pK from the pullable queue (s:%u)\n", (void *)kctx, + js); return kctx; } return NULL; @@ -1112,8 +1097,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( * Return: Context to use for specified slot. * NULL if no contexts present for specified slot */ -static struct kbase_context *kbase_js_ctx_list_pop_head( - struct kbase_device *kbdev, int js) +static struct kbase_context *kbase_js_ctx_list_pop_head(struct kbase_device *kbdev, unsigned int js) { struct kbase_context *kctx; unsigned long flags; @@ -1137,8 +1121,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head( * Return: true if context can be pulled from on specified slot * false otherwise */ -static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, - bool is_scheduled) +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled) { struct kbasep_js_device_data *js_devdata; struct kbase_jd_atom *katom; @@ -1157,8 +1140,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, } katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js); return false; /* No pullable atoms */ } if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { @@ -1166,7 +1148,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom, katom->jc, js, (unsigned int)katom->sched_priority); dev_dbg(kbdev->dev, - "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", (void *)kctx, katom->sched_priority, js); return false; } @@ -1187,14 +1169,14 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", (void *)katom, js); return false; } } - dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n", - (void *)katom, (void *)kctx, js); + dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%u)\n", (void *)katom, + (void *)kctx, js); return true; } @@ -1205,7 +1187,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, struct kbase_device *kbdev = kctx->kbdev; bool ret = true; bool has_dep = false, has_x_dep = false; - int js = kbase_js_get_slot(kbdev, katom); + unsigned int js = kbase_js_get_slot(kbdev, katom); int prio = katom->sched_priority; int i; @@ -1213,7 +1195,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, struct kbase_jd_atom *dep_atom = katom->dep[i].atom; if (dep_atom) { - int dep_js = kbase_js_get_slot(kbdev, dep_atom); + unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom); int dep_prio = dep_atom->sched_priority; dev_dbg(kbdev->dev, @@ -1368,7 +1350,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) { struct kbase_device *kbdev = kctx->kbdev; - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -2074,9 +2056,8 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev) kbase_backend_timeouts_changed(kbdev); } -static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -2084,7 +2065,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, bool kctx_suspended = false; int as_nr; - dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js); + dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%u)\n", kctx, js); js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -2111,8 +2092,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, WARN_ON(as_nr == KBASEP_AS_NR_INVALID); } } - if (as_nr == KBASEP_AS_NR_INVALID) - return false; /* No address spaces currently available */ + if ((as_nr < 0) || (as_nr >= BASE_MAX_NR_AS)) + return false; /* No address space currently available */ /* * Atomic transaction on the Context and Run Pool begins @@ -2219,9 +2200,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, return true; } -static bool kbase_js_use_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { unsigned long flags; @@ -2229,9 +2209,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && kbase_backend_use_ctx_sched(kbdev, kctx, js)) { - - dev_dbg(kbdev->dev, - "kctx %pK already has ASID - mark as active (s:%d)\n", + dev_dbg(kbdev->dev, "kctx %pK already has ASID - mark as active (s:%u)\n", (void *)kctx, js); if (kbdev->hwaccess.active_kctx[js] != kctx) { @@ -2498,8 +2476,7 @@ bool kbase_js_is_atom_valid(struct kbase_device *kbdev, return true; } -static int kbase_js_get_slot(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { if (katom->core_req & BASE_JD_REQ_JOB_SLOT) return katom->jobslot; @@ -2538,11 +2515,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, (katom->pre_dep && (katom->pre_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { int prio = katom->sched_priority; - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n", - (void *)katom, js); + dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%u)\n", (void *)katom, js); list_add_tail(&katom->queue, &queue->x_dep_head); katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; @@ -2633,8 +2609,8 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) * * Context: Caller must hold the HW access lock */ -static void kbase_js_evict_deps(struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js, int prio) +static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *katom, + unsigned int js, int prio) { struct kbase_jd_atom *x_dep = katom->x_post_dep; struct kbase_jd_atom *next_katom = katom->post_dep; @@ -2666,7 +2642,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, } } -struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) { struct kbase_jd_atom *katom; struct kbasep_js_device_data *js_devdata; @@ -2676,8 +2652,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%u)\n", (void *)kctx, js); js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -2696,13 +2671,12 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js); return NULL; } if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { dev_dbg(kbdev->dev, - "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", (void *)kctx, katom->sched_priority, js); return NULL; } @@ -2736,7 +2710,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", (void *)katom, js); return NULL; } @@ -2759,7 +2733,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom->ticks = 0; - dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n", + dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%u)\n", (void *)katom, (void *)kctx, js); return katom; @@ -3362,7 +3336,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && !kbase_jsctx_atoms_pulled(kctx) && !kbase_ctx_flag(kctx, KCTX_DYING)) { - int js; + unsigned int js; kbasep_js_set_submit_allowed(js_devdata, kctx); @@ -3374,7 +3348,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, } } else if (katom->x_post_dep && kbasep_js_is_submit_allowed(js_devdata, kctx)) { - int js; + unsigned int js; for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) @@ -3604,13 +3578,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) return false; } -void kbase_js_sched(struct kbase_device *kbdev, int js_mask) +void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) { struct kbasep_js_device_data *js_devdata; struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS]; bool timer_sync = false; bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; - int js; + unsigned int js; KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0); @@ -3639,24 +3613,20 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) if (!kctx) { js_mask &= ~(1 << js); - dev_dbg(kbdev->dev, - "No kctx on pullable list (s:%d)\n", - js); + dev_dbg(kbdev->dev, "No kctx on pullable list (s:%u)\n", js); break; } if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { context_idle = true; - dev_dbg(kbdev->dev, - "kctx %pK is not active (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "kctx %pK is not active (s:%u)\n", (void *)kctx, + js); if (kbase_pm_context_active_handle_suspend( kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { - dev_dbg(kbdev->dev, - "Suspend pending (s:%d)\n", js); + dev_dbg(kbdev->dev, "Suspend pending (s:%u)\n", js); /* Suspend pending - return context to * queue and stop scheduling */ @@ -3714,16 +3684,13 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) kbase_ctx_flag_clear(kctx, KCTX_PULLED); if (!kbase_jm_kick(kbdev, 1 << js)) { - dev_dbg(kbdev->dev, - "No more jobs can be submitted (s:%d)\n", - js); + dev_dbg(kbdev->dev, "No more jobs can be submitted (s:%u)\n", js); js_mask &= ~(1 << js); } if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { bool pullable; - dev_dbg(kbdev->dev, - "No atoms pulled from kctx %pK (s:%d)\n", + dev_dbg(kbdev->dev, "No atoms pulled from kctx %pK (s:%u)\n", (void *)kctx, js); pullable = kbase_js_ctx_pullable(kctx, js, @@ -3807,8 +3774,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == last_active[js] && ctx_waiting[js]) { - dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", - (void *)last_active[js], js); + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", + (void *)last_active[js], js); kbdev->hwaccess.active_kctx[js] = NULL; } } @@ -3879,7 +3846,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) */ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { unsigned long flags; - int js; + unsigned int js; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { @@ -4003,7 +3970,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, { struct kbase_device *kbdev; unsigned long flags; - u32 js; + unsigned int js; kbdev = kctx->kbdev; @@ -4035,4 +4002,3 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr req_priority); return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority); } - diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c index 7b8961679a10..14a730dc5a12 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c @@ -48,6 +48,11 @@ #include #include +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include +#endif + /* Define static_assert(). * * The macro was introduced in kernel 5.1. But older vendor kernels may define diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c index 5fb11b7b94c5..823f9156e19e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c @@ -36,9 +36,15 @@ #include #include #include +#include #include #include +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include +#endif + /* The minimum allowed interval between dumps, in nanoseconds * (equivalent to 10KHz) */ @@ -115,16 +121,31 @@ struct kbase_kinstr_prfcnt_client_config { }; /** - * struct kbase_kinstr_prfcnt_async - Asynchronous sampling operation to - * carry out for a kinstr_prfcnt_client. - * @dump_work: Worker for performing asynchronous counter dumps. - * @user_data: User data for asynchronous dump in progress. - * @ts_end_ns: End timestamp of most recent async dump. + * enum kbase_kinstr_prfcnt_client_init_state - A list of + * initialisation states that the + * kinstr_prfcnt client can be at + * during initialisation. Useful + * for terminating a partially + * initialised client. + * + * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised + * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session + * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map + * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer + * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array + * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client + * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue + * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised */ -struct kbase_kinstr_prfcnt_async { - struct work_struct dump_work; - u64 user_data; - u64 ts_end_ns; +enum kbase_kinstr_prfcnt_client_init_state { + KINSTR_PRFCNT_UNINITIALISED, + KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED, + KINSTR_PRFCNT_ENABLE_MAP, + KINSTR_PRFCNT_DUMP_BUFFER, + KINSTR_PRFCNT_SAMPLE_ARRAY, + KINSTR_PRFCNT_VIRTUALIZER_CLIENT, + KINSTR_PRFCNT_WAITQ_MUTEX, + KINSTR_PRFCNT_INITIALISED }; /** @@ -134,9 +155,7 @@ struct kbase_kinstr_prfcnt_async { * @hvcli: Hardware counter virtualizer client. * @node: Node used to attach this client to list in * kinstr_prfcnt context. - * @cmd_sync_lock: Lock coordinating the reader interface for commands - * that need interacting with the async sample dump - * worker thread. + * @cmd_sync_lock: Lock coordinating the reader interface for commands. * @next_dump_time_ns: Time in ns when this client's next periodic dump must * occur. If 0, not a periodic client. * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic @@ -157,15 +176,10 @@ struct kbase_kinstr_prfcnt_async { * @waitq: Client's notification queue. * @sample_size: Size of the data required for one sample, in bytes. * @sample_count: Number of samples the client is able to capture. - * @sync_sample_count: Number of available spaces for synchronous samples. - * It can differ from sample_count if asynchronous - * sample requests are reserving space in the buffer. * @user_data: User data associated with the session. * This is set when the session is started and stopped. * This value is ignored for control commands that * provide another value. - * @async: Asynchronous sampling operations to carry out in this - * client's session. */ struct kbase_kinstr_prfcnt_client { struct kbase_kinstr_prfcnt_context *kinstr_ctx; @@ -186,9 +200,7 @@ struct kbase_kinstr_prfcnt_client { wait_queue_head_t waitq; size_t sample_size; size_t sample_count; - atomic_t sync_sample_count; u64 user_data; - struct kbase_kinstr_prfcnt_async async; }; static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { @@ -423,6 +435,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena size_t grp, blk, blk_inst; struct prfcnt_metadata **ptr_md = block_meta_base; const struct kbase_hwcnt_metadata *metadata; + uint8_t block_idx = 0; if (!dst || !*block_meta_base) return -EINVAL; @@ -431,6 +444,10 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { u8 *dst_blk; + /* Block indices must be reported with no gaps. */ + if (blk_inst == 0) + block_idx = 0; + /* Skip unavailable or non-enabled blocks */ if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) || @@ -444,13 +461,14 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( kbase_hwcnt_metadata_block_type(metadata, grp, blk)); - (*ptr_md)->u.block_md.block_idx = (u8)blk_inst; + (*ptr_md)->u.block_md.block_idx = block_idx; (*ptr_md)->u.block_md.set = counter_set; (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN; (*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr); /* update the buf meta data block pointer to next item */ (*ptr_md)++; + block_idx++; } return 0; @@ -502,33 +520,6 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata( ptr_md->hdr.item_version = 0; } -/** - * kbasep_kinstr_prfcnt_client_output_empty_sample() - Assemble an empty sample - * for output. - * @cli: Non-NULL pointer to a kinstr_prfcnt client. - * @buf_idx: The index to the sample array for saving the sample. - */ -static void kbasep_kinstr_prfcnt_client_output_empty_sample( - struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx) -{ - struct kbase_hwcnt_dump_buffer *dump_buf; - struct prfcnt_metadata *ptr_md; - - if (WARN_ON(buf_idx >= cli->sample_arr.sample_count)) - return; - - dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf; - ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; - - kbase_hwcnt_dump_buffer_zero(dump_buf, &cli->enable_map); - - /* Use end timestamp from most recent async dump */ - ptr_md->u.sample_md.timestamp_start = cli->async.ts_end_ns; - ptr_md->u.sample_md.timestamp_end = cli->async.ts_end_ns; - - kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md); -} - /** * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output. * @cli: Non-NULL pointer to a kinstr_prfcnt client. @@ -578,16 +569,11 @@ static void kbasep_kinstr_prfcnt_client_output_sample( * @cli: Non-NULL pointer to a kinstr_prfcnt client. * @event_id: Event type that triggered the dump. * @user_data: User data to return to the user. - * @async_dump: Whether this is an asynchronous dump or not. - * @empty_sample: Sample block data will be 0 if this is true. * * Return: 0 on success, else error code. */ -static int -kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, - enum base_hwcnt_reader_event event_id, - u64 user_data, bool async_dump, - bool empty_sample) +static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, + enum base_hwcnt_reader_event event_id, u64 user_data) { int ret; u64 ts_start_ns = 0; @@ -605,17 +591,11 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, /* Check if there is a place to copy HWC block into. Calculate the * number of available samples count, by taking into account the type * of dump. - * Asynchronous dumps have the ability to reserve space in the samples - * array for future dumps, unlike synchronous dumps. Because of that, - * the samples count for synchronous dumps is managed by a variable - * called sync_sample_count, that originally is defined as equal to the - * size of the whole array but later decreases every time an - * asynchronous dump request is pending and then re-increased every - * time an asynchronous dump request is completed. */ - available_samples_count = async_dump ? - cli->sample_arr.sample_count : - atomic_read(&cli->sync_sample_count); + available_samples_count = cli->sample_arr.sample_count; + WARN_ON(available_samples_count < 1); + /* Reserve one slot to store the implicit sample taken on CMD_STOP */ + available_samples_count -= 1; if (write_idx - read_idx == available_samples_count) { /* For periodic sampling, the current active dump * will be accumulated in the next sample, when @@ -631,38 +611,19 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, */ write_idx %= cli->sample_arr.sample_count; - if (!empty_sample) { - ret = kbase_hwcnt_virtualizer_client_dump( - cli->hvcli, &ts_start_ns, &ts_end_ns, &cli->tmp_buf); - /* HWC dump error, set the sample with error flag */ - if (ret) - cli->sample_flags |= SAMPLE_FLAG_ERROR; + ret = kbase_hwcnt_virtualizer_client_dump(cli->hvcli, &ts_start_ns, &ts_end_ns, + &cli->tmp_buf); + /* HWC dump error, set the sample with error flag */ + if (ret) + cli->sample_flags |= SAMPLE_FLAG_ERROR; - /* Make the sample ready and copy it to the userspace mapped buffer */ - kbasep_kinstr_prfcnt_client_output_sample( - cli, write_idx, user_data, ts_start_ns, ts_end_ns); - } else { - if (!async_dump) { - struct prfcnt_metadata *ptr_md; - /* User data will not be updated for empty samples. */ - ptr_md = cli->sample_arr.samples[write_idx].sample_meta; - ptr_md->u.sample_md.user_data = user_data; - } - - /* Make the sample ready and copy it to the userspace mapped buffer */ - kbasep_kinstr_prfcnt_client_output_empty_sample(cli, write_idx); - } + /* Make the sample ready and copy it to the userspace mapped buffer */ + kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, ts_start_ns, + ts_end_ns); /* Notify client. Make sure all changes to memory are visible. */ wmb(); atomic_inc(&cli->write_idx); - if (async_dump) { - /* Remember the end timestamp of async dump for empty samples */ - if (!empty_sample) - cli->async.ts_end_ns = ts_end_ns; - - atomic_inc(&cli->sync_sample_count); - } wake_up_interruptible(&cli->waitq); /* Reset the flags for the next sample dump */ cli->sample_flags = 0; @@ -676,6 +637,9 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, { int ret; u64 tm_start, tm_end; + unsigned int write_idx; + unsigned int read_idx; + size_t available_samples_count; WARN_ON(!cli); lockdep_assert_held(&cli->cmd_sync_lock); @@ -684,6 +648,16 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, if (cli->active) return 0; + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + /* Check whether there is space to store atleast an implicit sample + * corresponding to CMD_STOP. + */ + available_samples_count = cli->sample_count - (write_idx - read_idx); + if (!available_samples_count) + return -EBUSY; + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em); @@ -696,7 +670,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL); if (!ret) { - atomic_set(&cli->sync_sample_count, cli->sample_count); cli->active = true; cli->user_data = user_data; cli->sample_flags = 0; @@ -710,16 +683,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, return ret; } -static int kbasep_kinstr_prfcnt_client_wait_async_done( - struct kbase_kinstr_prfcnt_client *cli) -{ - lockdep_assert_held(&cli->cmd_sync_lock); - - return wait_event_interruptible(cli->waitq, - atomic_read(&cli->sync_sample_count) == - cli->sample_count); -} - static int kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, u64 user_data) @@ -728,7 +691,7 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, u64 tm_start = 0; u64 tm_end = 0; struct kbase_hwcnt_physical_enable_map phys_em; - struct kbase_hwcnt_dump_buffer *tmp_buf = NULL; + size_t available_samples_count; unsigned int write_idx; unsigned int read_idx; @@ -739,12 +702,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, if (!cli->active) return -EINVAL; - /* Wait until pending async sample operation done */ - ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli); - - if (ret < 0) - return -ERESTARTSYS; + mutex_lock(&cli->kinstr_ctx->lock); + /* Disable counters under the lock, so we do not race with the + * sampling thread. + */ phys_em.fe_bm = 0; phys_em.tiler_bm = 0; phys_em.mmu_l2_bm = 0; @@ -752,15 +714,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); - mutex_lock(&cli->kinstr_ctx->lock); - /* Check whether one has the buffer to hold the last sample */ write_idx = atomic_read(&cli->write_idx); read_idx = atomic_read(&cli->read_idx); - /* Check if there is a place to save the last stop produced sample */ - if (write_idx - read_idx < cli->sample_arr.sample_count) - tmp_buf = &cli->tmp_buf; + available_samples_count = cli->sample_count - (write_idx - read_idx); ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, &cli->enable_map, @@ -770,7 +728,8 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, if (ret) cli->sample_flags |= SAMPLE_FLAG_ERROR; - if (tmp_buf) { + /* There must be a place to save the last stop produced sample */ + if (!WARN_ON(!available_samples_count)) { write_idx %= cli->sample_arr.sample_count; /* Handle the last stop sample */ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, @@ -800,50 +759,6 @@ kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, u64 user_data) { int ret; - bool empty_sample = false; - - lockdep_assert_held(&cli->cmd_sync_lock); - - /* If the client is not started, or not manual, the command invalid */ - if (!cli->active || cli->dump_interval_ns) - return -EINVAL; - - /* Wait until pending async sample operation done, this is required to - * satisfy the stated sample sequence following their issuing order, - * reflected by the sample start timestamp. - */ - if (atomic_read(&cli->sync_sample_count) != cli->sample_count) { - /* Return empty sample instead of performing real dump. - * As there is an async dump currently in-flight which will - * have the desired information. - */ - empty_sample = true; - ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli); - - if (ret < 0) - return -ERESTARTSYS; - } - - mutex_lock(&cli->kinstr_ctx->lock); - - ret = kbasep_kinstr_prfcnt_client_dump(cli, - BASE_HWCNT_READER_EVENT_MANUAL, - user_data, false, empty_sample); - - mutex_unlock(&cli->kinstr_ctx->lock); - - return ret; -} - -static int -kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli, - u64 user_data) -{ - unsigned int write_idx; - unsigned int read_idx; - unsigned int active_async_dumps; - unsigned int new_async_buf_idx; - int ret; lockdep_assert_held(&cli->cmd_sync_lock); @@ -853,45 +768,7 @@ kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli, mutex_lock(&cli->kinstr_ctx->lock); - write_idx = atomic_read(&cli->write_idx); - read_idx = atomic_read(&cli->read_idx); - active_async_dumps = - cli->sample_count - atomic_read(&cli->sync_sample_count); - new_async_buf_idx = write_idx + active_async_dumps; - - /* Check if there is a place to copy HWC block into. - * If successful, reserve space in the buffer for the asynchronous - * operation to make sure that it can actually take place. - * Because we reserve space for asynchronous dumps we need to take that - * in consideration here. - */ - ret = (new_async_buf_idx - read_idx == cli->sample_arr.sample_count) ? - -EBUSY : - 0; - - if (ret == -EBUSY) { - mutex_unlock(&cli->kinstr_ctx->lock); - return ret; - } - - if (active_async_dumps > 0) { - struct prfcnt_metadata *ptr_md; - unsigned int buf_idx = - new_async_buf_idx % cli->sample_arr.sample_count; - /* Instead of storing user_data, write it directly to future - * empty sample. - */ - ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; - ptr_md->u.sample_md.user_data = user_data; - - atomic_dec(&cli->sync_sample_count); - } else { - cli->async.user_data = user_data; - atomic_dec(&cli->sync_sample_count); - - kbase_hwcnt_virtualizer_queue_work(cli->kinstr_ctx->hvirt, - &cli->async.dump_work); - } + ret = kbasep_kinstr_prfcnt_client_dump(cli, BASE_HWCNT_READER_EVENT_MANUAL, user_data); mutex_unlock(&cli->kinstr_ctx->lock); @@ -948,10 +825,6 @@ int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, ret = kbasep_kinstr_prfcnt_client_sync_dump( cli, control_cmd->user_data); break; - case PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: - ret = kbasep_kinstr_prfcnt_client_async_dump( - cli, control_cmd->user_data); - break; case PRFCNT_CONTROL_CMD_DISCARD: ret = kbasep_kinstr_prfcnt_client_discard(cli); break; @@ -1006,17 +879,6 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, sample_meta = cli->sample_arr.samples[read_idx].sample_meta; sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf; - /* Verify that a valid sample has been dumped in the read_idx. - * There are situations where this may not be the case, - * for instance if the client is trying to get an asynchronous - * sample which has not been dumped yet. - */ - if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE || - sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION) { - err = -EINVAL; - goto error_out; - } - sample_access->sequence = sample_meta->u.sample_md.seq; sample_access->sample_offset_bytes = sample_offset_bytes; @@ -1163,19 +1025,46 @@ static void kbasep_kinstr_prfcnt_sample_array_free( memset(sample_arr, 0, sizeof(*sample_arr)); } -void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) +static void +kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli, + enum kbase_kinstr_prfcnt_client_init_state init_state) { if (!cli) return; - kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); - kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); - kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); - kbase_hwcnt_enable_map_free(&cli->enable_map); - mutex_destroy(&cli->cmd_sync_lock); + while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) { + switch (init_state) { + case KINSTR_PRFCNT_INITIALISED: + /* This shouldn't be reached */ + break; + case KINSTR_PRFCNT_WAITQ_MUTEX: + mutex_destroy(&cli->cmd_sync_lock); + break; + case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: + kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); + break; + case KINSTR_PRFCNT_SAMPLE_ARRAY: + kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); + break; + case KINSTR_PRFCNT_DUMP_BUFFER: + kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); + break; + case KINSTR_PRFCNT_ENABLE_MAP: + kbase_hwcnt_enable_map_free(&cli->enable_map); + break; + case KINSTR_PRFCNT_PARSE_SETUP: + /* Nothing to do here */ + break; + } + } kfree(cli); } +void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) +{ + kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED); +} + /** * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release. * @inode: Non-NULL pointer to inode structure. @@ -1279,9 +1168,8 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) list_for_each_entry(pos, &kinstr_ctx->clients, node) { if (pos->active && (pos->next_dump_time_ns != 0) && (pos->next_dump_time_ns < cur_time_ns)) - kbasep_kinstr_prfcnt_client_dump( - pos, BASE_HWCNT_READER_EVENT_PERIODIC, - pos->user_data, false, false); + kbasep_kinstr_prfcnt_client_dump(pos, BASE_HWCNT_READER_EVENT_PERIODIC, + pos->user_data); } kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx); @@ -1289,48 +1177,6 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) mutex_unlock(&kinstr_ctx->lock); } -/** - * kbasep_kinstr_prfcnt_async_dump_worker()- Dump worker for a manual client - * to take a single asynchronous - * sample. - * @work: Work structure. - */ -static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work) -{ - struct kbase_kinstr_prfcnt_async *cli_async = - container_of(work, struct kbase_kinstr_prfcnt_async, dump_work); - struct kbase_kinstr_prfcnt_client *cli = container_of( - cli_async, struct kbase_kinstr_prfcnt_client, async); - - mutex_lock(&cli->kinstr_ctx->lock); - /* While the async operation is in flight, a sync stop might have been - * executed, for which the dump should be skipped. Further as we are - * doing an async dump, we expect that there is reserved buffer for - * this to happen. This is to avoid the rare corner case where the - * user side has issued a stop/start pair before the async work item - * get the chance to execute. - */ - if (cli->active && - (atomic_read(&cli->sync_sample_count) < cli->sample_count)) - kbasep_kinstr_prfcnt_client_dump(cli, - BASE_HWCNT_READER_EVENT_MANUAL, - cli->async.user_data, true, - false); - - /* While the async operation is in flight, more async dump requests - * may have been submitted. In this case, no more async dumps work - * will be queued. Instead space will be reserved for that dump and - * an empty sample will be return after handling the current async - * dump. - */ - while (cli->active && - (atomic_read(&cli->sync_sample_count) < cli->sample_count)) { - kbasep_kinstr_prfcnt_client_dump( - cli, BASE_HWCNT_READER_EVENT_MANUAL, 0, true, true); - } - mutex_unlock(&cli->kinstr_ctx->lock); -} - /** * kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for * execution as soon as possible. @@ -1790,83 +1636,100 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst { int err; struct kbase_kinstr_prfcnt_client *cli; + enum kbase_kinstr_prfcnt_client_init_state init_state; - WARN_ON(!kinstr_ctx); - WARN_ON(!setup); - WARN_ON(!req_arr); + if (WARN_ON(!kinstr_ctx)) + return -EINVAL; + + if (WARN_ON(!setup)) + return -EINVAL; + + if (WARN_ON(!req_arr)) + return -EINVAL; cli = kzalloc(sizeof(*cli), GFP_KERNEL); if (!cli) return -ENOMEM; - cli->kinstr_ctx = kinstr_ctx; - err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, req_arr); + for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED; + init_state++) { + err = 0; + switch (init_state) { + case KINSTR_PRFCNT_PARSE_SETUP: + cli->kinstr_ctx = kinstr_ctx; + err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, + req_arr); - if (err < 0) - goto error; + break; - cli->config.buffer_count = MAX_BUFFER_COUNT; - cli->dump_interval_ns = cli->config.period_ns; - cli->next_dump_time_ns = 0; - cli->active = false; - atomic_set(&cli->write_idx, 0); - atomic_set(&cli->read_idx, 0); - atomic_set(&cli->fetch_idx, 0); + case KINSTR_PRFCNT_ENABLE_MAP: + cli->config.buffer_count = MAX_BUFFER_COUNT; + cli->dump_interval_ns = cli->config.period_ns; + cli->next_dump_time_ns = 0; + cli->active = false; + atomic_set(&cli->write_idx, 0); + atomic_set(&cli->read_idx, 0); + atomic_set(&cli->fetch_idx, 0); - err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, - &cli->enable_map); + err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map); + break; - if (err < 0) - goto error; + case KINSTR_PRFCNT_DUMP_BUFFER: + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, + &cli->config.phys_em); - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em); + cli->sample_count = cli->config.buffer_count; + cli->sample_size = + kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata); - cli->sample_count = cli->config.buffer_count; - atomic_set(&cli->sync_sample_count, cli->sample_count); - cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata); + /* Use virtualizer's metadata to alloc tmp buffer which interacts with + * the HWC virtualizer. + */ + err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf); + break; - /* Use virtualizer's metadata to alloc tmp buffer which interacts with - * the HWC virtualizer. - */ - err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, - &cli->tmp_buf); + case KINSTR_PRFCNT_SAMPLE_ARRAY: + /* Disable clock map in setup, and enable clock map when start */ + cli->enable_map.clk_enable_map = 0; - if (err < 0) - goto error; + /* Use metadata from virtualizer to allocate dump buffers if + * kinstr_prfcnt doesn't have the truncated metadata. + */ + err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata); - /* Disable clock map in setup, and enable clock map when start */ - cli->enable_map.clk_enable_map = 0; + break; - /* Use metadata from virtualizer to allocate dump buffers if - * kinstr_prfcnt doesn't have the truncated metadata. - */ - err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata); + case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: + /* Set enable map to be 0 to prevent virtualizer to init and kick the + * backend to count. + */ + kbase_hwcnt_gpu_enable_map_from_physical( + &cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 }); - if (err < 0) - goto error; + err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt, + &cli->enable_map, &cli->hvcli); + break; - /* Set enable map to be 0 to prevent virtualizer to init and kick the backend to count */ - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, - &(struct kbase_hwcnt_physical_enable_map){ 0 }); + case KINSTR_PRFCNT_WAITQ_MUTEX: + init_waitqueue_head(&cli->waitq); + mutex_init(&cli->cmd_sync_lock); + break; - err = kbase_hwcnt_virtualizer_client_create( - kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli); + case KINSTR_PRFCNT_INITIALISED: + /* This shouldn't be reached */ + break; + } - if (err < 0) - goto error; - - init_waitqueue_head(&cli->waitq); - INIT_WORK(&cli->async.dump_work, - kbasep_kinstr_prfcnt_async_dump_worker); - mutex_init(&cli->cmd_sync_lock); + if (err < 0) { + kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state); + return err; + } + } *out_vcli = cli; return 0; -error: - kbasep_kinstr_prfcnt_client_destroy(cli); - return err; } static size_t kbasep_kinstr_prfcnt_get_block_info_count( @@ -2085,17 +1948,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, union kbase_ioctl_kinstr_prfcnt_setup *setup) { int err; - unsigned int item_count; - unsigned long bytes; - struct prfcnt_request_item *req_arr; + size_t item_count; + size_t bytes; + struct prfcnt_request_item *req_arr = NULL; struct kbase_kinstr_prfcnt_client *cli = NULL; + const size_t max_bytes = 32 * sizeof(*req_arr); if (!kinstr_ctx || !setup) return -EINVAL; item_count = setup->in.request_item_count; - /* Limiting the request items to 2x of the expected: acommodating + /* Limiting the request items to 2x of the expected: accommodating * moderate duplications but rejecting excessive abuses. */ if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) || @@ -2103,7 +1967,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, return -EINVAL; } - bytes = item_count * sizeof(*req_arr); + if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes)) + return -EINVAL; + + /* Further limiting the max bytes to copy from userspace by setting it in the following + * fashion: a maximum of 1 mode item, 4 types of 3 sets for a total of 12 enable items, + * each currently at the size of prfcnt_request_item. + * + * Note: if more request types get added, this max limit needs to be updated. + */ + if (bytes > max_bytes) + return -EINVAL; + req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes); if (IS_ERR(req_arr)) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h index 1d8d1967eee9..e5c6f7a0b217 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2014, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2014, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,7 @@ #include #include -#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API)) +#if IS_ENABLED(MALI_KERNEL_TEST_API) #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) #else #define KBASE_EXPORT_TEST_API(func) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index 3743b4df999f..b18b1e25267e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -803,6 +803,40 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, } #endif /* MALI_USE_CSF */ +static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) +{ + struct kbase_context *kctx = NULL; + struct rb_root *rbtree = reg->rbtree; + + switch (reg->flags & KBASE_REG_ZONE_MASK) { + case KBASE_REG_ZONE_CUSTOM_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); + break; + case KBASE_REG_ZONE_SAME_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); + break; + case KBASE_REG_ZONE_EXEC_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); + break; +#if MALI_USE_CSF + case KBASE_REG_ZONE_EXEC_FIXED_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); + break; + case KBASE_REG_ZONE_FIXED_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); + break; + case KBASE_REG_ZONE_MCU_SHARED: + /* This is only expected to be called on driver unload. */ + break; +#endif + default: + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); + break; + } + + return kctx; +} + static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) { struct rb_node *rbnode; @@ -814,6 +848,8 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) rb_erase(rbnode, rbtree); reg = rb_entry(rbnode, struct kbase_va_region, rblink); WARN_ON(reg->va_refcnt != 1); + if (kbase_page_migration_enabled) + kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); /* Reset the start_pfn - as the rbtree is being * destroyed and we've already erased this region, there * is no further need to attempt to remove it. @@ -830,6 +866,10 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); @@ -1554,6 +1594,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, return NULL; new_reg->va_refcnt = 1; + new_reg->no_user_free_refcnt = 0; new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ new_reg->rbtree = rbtree; @@ -1572,41 +1613,6 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, KBASE_EXPORT_TEST_API(kbase_alloc_free_region); -static struct kbase_context *kbase_reg_flags_to_kctx( - struct kbase_va_region *reg) -{ - struct kbase_context *kctx = NULL; - struct rb_root *rbtree = reg->rbtree; - - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_custom); - break; - case KBASE_REG_ZONE_SAME_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_same); - break; - case KBASE_REG_ZONE_EXEC_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_exec); - break; -#if MALI_USE_CSF - case KBASE_REG_ZONE_EXEC_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); - break; - case KBASE_REG_ZONE_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); - break; -#endif - default: - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); - break; - } - - return kctx; -} - /** * kbase_free_alloced_region - Free a region object. * @@ -1720,6 +1726,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned long gwt_mask = ~0; int group_id; struct kbase_mem_phy_alloc *alloc; + bool ignore_page_migration = false; #ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) @@ -1749,15 +1756,12 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, for (i = 0; i < alloc->imported.alias.nents; i++) { if (alloc->imported.alias.aliased[i].alloc) { err = kbase_mmu_insert_pages( - kctx->kbdev, &kctx->mmu, - reg->start_pfn + (i * stride), - alloc->imported.alias.aliased[i] - .alloc->pages + - alloc->imported.alias.aliased[i] - .offset, + kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), + alloc->imported.alias.aliased[i].alloc->pages + + alloc->imported.alias.aliased[i].offset, alloc->imported.alias.aliased[i].length, - reg->flags & gwt_mask, kctx->as_nr, - group_id, mmu_sync_info); + reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, + NULL, ignore_page_migration); if (err) goto bad_insert; @@ -1777,12 +1781,15 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, } } } else { - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) + ignore_page_migration = true; + + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - group_id, mmu_sync_info); + reg->flags & gwt_mask, kctx->as_nr, group_id, + mmu_sync_info, reg, ignore_page_migration); if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(alloc); @@ -1816,7 +1823,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, bad_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - reg->nr_pages, kctx->as_nr); + reg->nr_pages, kctx->as_nr, ignore_page_migration); kbase_remove_va_region(kctx->kbdev, reg); @@ -1845,7 +1852,6 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) switch (alloc->type) { case KBASE_MEM_TYPE_ALIAS: { size_t i = 0; - /* Due to the way the number of valid PTEs and ATEs are tracked * currently, only the GPU virtual range that is backed & mapped * should be passed to the kbase_mmu_teardown_pages() function, @@ -1853,27 +1859,37 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) * separately. */ for (i = 0; i < alloc->imported.alias.nents; i++) { - if (alloc->imported.alias.aliased[i].alloc) { - int err_loop = kbase_mmu_teardown_pages( - kctx->kbdev, &kctx->mmu, - reg->start_pfn + (i * alloc->imported.alias.stride), - alloc->pages + (i * alloc->imported.alias.stride), - alloc->imported.alias.aliased[i].length, - kctx->as_nr); - if (WARN_ON_ONCE(err_loop)) - err = err_loop; - } + struct tagged_addr *phys_alloc = NULL; + int err_loop; + + if (alloc->imported.alias.aliased[i].alloc != NULL) + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + + alloc->imported.alias.aliased[i].offset; + + err_loop = kbase_mmu_teardown_pages( + kctx->kbdev, &kctx->mmu, + reg->start_pfn + (i * alloc->imported.alias.stride), + phys_alloc, alloc->imported.alias.aliased[i].length, + kctx->as_nr, false); + + if (WARN_ON_ONCE(err_loop)) + err = err_loop; } } break; case KBASE_MEM_TYPE_IMPORTED_UMM: err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, kctx->as_nr); + alloc->pages, reg->nr_pages, kctx->as_nr, true); + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, kbase_reg_current_backed_size(reg), + kctx->as_nr, true); break; default: err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), - kctx->as_nr); + kctx->as_nr, false); break; } @@ -2046,6 +2062,7 @@ void kbase_sync_single(struct kbase_context *kctx, src = ((unsigned char *)kmap(gpu_page)) + offset; dst = ((unsigned char *)kmap(cpu_page)) + offset; } + memcpy(dst, src, size); kunmap(gpu_page); kunmap(cpu_page); @@ -2197,7 +2214,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); - if (reg->flags & KBASE_REG_NO_USER_FREE) { + if (kbase_va_region_is_no_user_free(kctx, reg)) { dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); return -EINVAL; } @@ -2416,8 +2433,11 @@ int kbase_update_region_flags(struct kbase_context *kctx, if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; - if (flags & BASEP_MEM_NO_USER_FREE) - reg->flags |= KBASE_REG_NO_USER_FREE; + if (flags & BASEP_MEM_NO_USER_FREE) { + kbase_gpu_vm_lock(kctx); + kbase_va_region_no_user_free_get(kctx, reg); + kbase_gpu_vm_unlock(kctx); + } if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; @@ -3206,9 +3226,32 @@ out_rollback: out_term: return -1; } - KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); +void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, + enum kbase_page_status status) +{ + u32 i = 0; + + for (; i < alloc->nents; i++) { + struct tagged_addr phys = alloc->pages[i]; + struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys)); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(phys) || is_partial(phys)) + continue; + + if (!page_md) + continue; + + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); + spin_unlock(&page_md->migrate_lock); + } +} + bool kbase_check_alloc_flags(unsigned long flags) { /* Only known input flags should be set. */ @@ -3766,7 +3809,15 @@ static void kbase_jit_destroy_worker(struct work_struct *work) mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_lock(kctx); - reg->flags &= ~KBASE_REG_NO_USER_FREE; + + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(reg->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, reg); kbase_mem_free_region(kctx, reg); kbase_gpu_vm_unlock(kctx); } while (1); @@ -4419,7 +4470,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (ret < 0) { /* * An update to an allocation from the pool failed, - * chances are slim a new allocation would fair any + * chances are slim a new allocation would fare any * better so return the allocation to the pool and * return the function with failure. */ @@ -4441,6 +4492,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); reg = NULL; goto end; + } else { + /* A suitable JIT allocation existed on the evict list, so we need + * to make sure that the NOT_MOVABLE property is cleared. + */ + if (kbase_page_migration_enabled) { + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + } } } else { /* No suitable JIT allocation was found so create a new one */ @@ -4497,6 +4559,29 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, } } + /* Similarly to tiler heap init, there is a short window of time + * where the (either recycled or newly allocated, in our case) region has + * "no user free" refcount incremented but is still missing the DONT_NEED flag, and + * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the + * allocation is the least bad option that doesn't lead to a security issue down the + * line (it will eventually be cleaned up during context termination). + * + * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region + * flags. + */ + kbase_gpu_vm_lock(kctx); + if (unlikely(reg->no_user_free_refcnt > 1)) { + kbase_gpu_vm_unlock(kctx); + dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n"); + + mutex_lock(&kctx->jit_evict_lock); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); + + reg = NULL; + goto end; + } + trace_mali_jit_alloc(reg, info->id); kctx->jit_current_allocations++; @@ -4514,6 +4599,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kbase_jit_report_update_pressure(kctx, reg, info->va_pages, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_gpu_vm_unlock(kctx); end: for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) @@ -4584,6 +4670,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) list_move(®->jit_node, &kctx->jit_pool_head); + /* Inactive JIT regions should be freed by the shrinker and not impacted + * by page migration. Once freed, they will enter into the page migration + * state machine via the mempools. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); } @@ -4630,7 +4722,14 @@ bool kbase_jit_evict(struct kbase_context *kctx) mutex_unlock(&kctx->jit_evict_lock); if (reg) { - reg->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(reg->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, reg); kbase_mem_free_region(kctx, reg); } @@ -4652,7 +4751,14 @@ void kbase_jit_term(struct kbase_context *kctx) list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(walker->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, walker); kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -4664,7 +4770,14 @@ void kbase_jit_term(struct kbase_context *kctx) list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(walker->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, walker); kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -4873,10 +4986,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct page **pages; struct tagged_addr *pa; long i, dma_mapped_pages; - unsigned long address; struct device *dev; - unsigned long offset_within_page; - unsigned long remaining_size; unsigned long gwt_mask = ~0; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -4892,19 +5002,29 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc = reg->gpu_alloc; pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; pinned_pages = alloc->nents; pages = alloc->imported.user_buf.pages; dev = kctx->kbdev->dev; - offset_within_page = address & ~PAGE_MASK; - remaining_size = alloc->imported.user_buf.size; + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ for (i = 0; i < pinned_pages; i++) { - unsigned long map_size = - MIN(PAGE_SIZE - offset_within_page, remaining_size); - dma_addr_t dma_addr = dma_map_page(dev, pages[i], - offset_within_page, map_size, - DMA_BIDIRECTIONAL); + dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); err = dma_mapping_error(dev, dma_addr); if (err) @@ -4913,8 +5033,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - remaining_size -= map_size; - offset_within_page = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); } #ifdef CONFIG_MALI_CINSTR_GWT @@ -4922,29 +5041,31 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - pa, kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id, mmu_sync_info); + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, + kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true); if (err == 0) return 0; /* fall down */ unwind: alloc->nents = 0; - offset_within_page = address & ~PAGE_MASK; - remaining_size = alloc->imported.user_buf.size; dma_mapped_pages = i; - /* Run the unmap loop in the same order as map loop */ + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This is precautionary measure in case a GPU job has taken + * advantage of a partially GPU-mapped range to write and corrupt the + * content of memory, either inside or outside the imported region. + * + * Notice that this error recovery path doesn't try to be optimal and just + * flushes the entire page range. + */ for (i = 0; i < dma_mapped_pages; i++) { - unsigned long unmap_size = - MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - unmap_size, DMA_BIDIRECTIONAL); - remaining_size -= unmap_size; - offset_within_page = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); } /* The user buffer could already have been previously pinned before @@ -4985,12 +5106,85 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem #endif for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long unmap_size = - MIN(remaining_size, PAGE_SIZE - offset_within_page); + unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page); + /* Notice: this is a temporary variable that is used for DMA sync + * operations, and that could be incremented by an offset if the + * current page contains both imported and non-imported memory + * sub-regions. + * + * It is valid to add an offset to this value, because the offset + * is always kept within the physically contiguous dma-mapped range + * and there's no need to translate to physical address to offset it. + * + * This variable is not going to be used for the actual DMA unmap + * operation, that shall always use the original DMA address of the + * whole memory page. + */ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size, - DMA_BIDIRECTIONAL); + /* Manual CPU cache synchronization. + * + * When the GPU returns ownership of the buffer to the CPU, the driver + * needs to treat imported and non-imported memory differently. + * + * The first case to consider is non-imported sub-regions at the + * beginning of the first page and at the end of last page. For these + * sub-regions: CPU cache shall be committed with a clean+invalidate, + * in order to keep the last CPU write. + * + * Imported region prefers the opposite treatment: this memory has been + * legitimately mapped and used by the GPU, hence GPU writes shall be + * committed to memory, while CPU cache shall be invalidated to make + * sure that CPU reads the correct memory content. + * + * The following diagram shows the expect value of the variables + * used in this loop in the corner case of an imported region encloed + * by a single memory page: + * + * page boundary ->|---------- | <- dma_addr (initial value) + * | | + * | - - - - - | <- offset_within_page + * |XXXXXXXXXXX|\ + * |XXXXXXXXXXX| \ + * |XXXXXXXXXXX| }- imported_size + * |XXXXXXXXXXX| / + * |XXXXXXXXXXX|/ + * | - - - - - | <- offset_within_page + imported_size + * | |\ + * | | }- PAGE_SIZE - imported_size - offset_within_page + * | |/ + * page boundary ->|-----------| + * + * If the imported region is enclosed by more than one page, then + * offset_within_page = 0 for any page after the first. + */ + + /* Only for first page: handle non-imported range at the beginning. */ + if (offset_within_page > 0) { + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, + DMA_BIDIRECTIONAL); + dma_addr += offset_within_page; + } + + /* For every page: handle imported range. */ + if (imported_size > 0) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, + DMA_BIDIRECTIONAL); + + /* Only for last page (that may coincide with first page): + * handle non-imported range at the end. + */ + if ((imported_size + offset_within_page) < PAGE_SIZE) { + dma_addr += imported_size; + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + PAGE_SIZE - imported_size - offset_within_page, + DMA_BIDIRECTIONAL); + } + + /* Notice: use the original DMA address to unmap the whole memory page. */ + dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + if (writeable) set_page_dirty_lock(pages[i]); #if !MALI_USE_CSF @@ -4998,7 +5192,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem pages[i] = NULL; #endif - remaining_size -= unmap_size; + remaining_size -= imported_size; offset_within_page = 0; } #if !MALI_USE_CSF @@ -5079,8 +5273,9 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi break; } default: - WARN(1, "Invalid external resource GPU allocation type (%x) on mapping", - alloc->type); + dev_dbg(kctx->kbdev->dev, + "Invalid external resource GPU allocation type (%x) on mapping", + alloc->type); return -EINVAL; } @@ -5113,7 +5308,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), - kctx->as_nr); + kctx->as_nr, true); } if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h index 5820f6d8a556..f0f5f92c793c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -193,10 +193,11 @@ struct kbase_mem_phy_alloc { * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory * pool of a dying context are being moved to the device * memory pool. + * @NOT_MOVABLE: Stable state. Page has been allocated for an object that is + * not movable, but may return to be movable when the object + * is freed. * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU * and has reference to kbase_mem_phy_alloc object. - * @MULTI_MAPPED: Stable state. This state is used to manage all use cases - * where a page may have "unusual" mappings. * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't * reference kbase_mem_phy_alloc object. Used as a page in MMU * page table. @@ -205,9 +206,11 @@ struct kbase_mem_phy_alloc { * unmapping it. This status means that a memory release is * happening and it's still not complete. * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. - * A page is isolated while it is in ALLOCATED_MAPPED or - * PT_MAPPED state, but then the driver tries to destroy the - * allocation. + * A page is isolated while it is in ALLOCATED_MAPPED state, + * but then the driver tries to destroy the allocation. + * @FREE_PT_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. + * A page is isolated while it is in PT_MAPPED state, but + * then the driver tries to destroy the allocation. * * Pages can only be migrated in stable states. */ @@ -215,23 +218,32 @@ enum kbase_page_status { MEM_POOL = 0, ALLOCATE_IN_PROGRESS, SPILL_IN_PROGRESS, + NOT_MOVABLE, ALLOCATED_MAPPED, - MULTI_MAPPED, PT_MAPPED, FREE_IN_PROGRESS, FREE_ISOLATED_IN_PROGRESS, + FREE_PT_ISOLATED_IN_PROGRESS, }; +#define PGD_VPFN_LEVEL_MASK ((u64)0x3) +#define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK) +#define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK) +#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level) \ + ((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK)) + /** * struct kbase_page_metadata - Metadata for each page in kbase * * @kbdev: Pointer to kbase device. * @dma_addr: DMA address mapped to page. * @migrate_lock: A spinlock to protect the private metadata. + * @data: Member in union valid based on @status. * @status: Status to keep track if page can be migrated at any * given moment. MSB will indicate if page is isolated. * Protected by @migrate_lock. - * @data: Member in union valid based on @status. + * @vmap_count: Counter of kernel mappings. + * @group_id: Memory group ID obtained at the time of page allocation. * * Each 4KB page will have a reference to this struct in the private field. * This will be used to keep track of information required for Linux page @@ -240,7 +252,6 @@ enum kbase_page_status { struct kbase_page_metadata { dma_addr_t dma_addr; spinlock_t migrate_lock; - u8 status; union { struct { @@ -251,19 +262,25 @@ struct kbase_page_metadata { struct kbase_device *kbdev; } mem_pool; struct { - struct kbase_mem_phy_alloc *phy_alloc; struct kbase_va_region *reg; struct kbase_mmu_table *mmut; - struct page *pgd; u64 vpfn; - size_t page_array_index; } mapped; struct { struct kbase_mmu_table *mmut; - struct page *pgd; - u16 entry_info; + u64 pgd_vpfn_level; } pt_mapped; + struct { + struct kbase_device *kbdev; + } free_isolated; + struct { + struct kbase_device *kbdev; + } free_pt_isolated; } data; + + u8 status; + u8 vmap_count; + u8 group_id; }; /* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is @@ -288,6 +305,20 @@ enum kbase_jit_report_flags { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) }; +/** + * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying + * physical allocation. + * @alloc: the physical allocation containing the pages whose metadata is going + * to be modified + * @status: the status the pages should end up in + * + * Note that this function does not go through all of the checking to ensure that + * proper states are set. Instead, it is only used when we change the allocation + * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED + */ +void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, + enum kbase_page_status status); + static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) { KBASE_DEBUG_ASSERT(alloc); @@ -388,6 +419,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m * @jit_usage_id: The last just-in-time memory usage ID for this region. * @jit_bin_id: The just-in-time memory bin this region came from. * @va_refcnt: Number of users of this region. Protected by reg_lock. + * @no_user_free_refcnt: Number of users that want to prevent the region from + * being freed by userspace. * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of * an allocated region * The object can be one of: @@ -508,10 +541,7 @@ struct kbase_va_region { #define KBASE_REG_RESERVED_BIT_23 (1ul << 23) #endif /* !MALI_USE_CSF */ -/* Whilst this flag is set the GPU allocation is not supposed to be freed by - * user space. The flag will remain set for the lifetime of JIT allocations. - */ -#define KBASE_REG_NO_USER_FREE (1ul << 24) +/* Bit 24 is currently unused and is available for use for a new flag */ /* Memory has permanent kernel side mapping */ #define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) @@ -652,6 +682,7 @@ struct kbase_va_region { #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ int va_refcnt; + int no_user_free_refcnt; }; /** @@ -694,6 +725,23 @@ static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); } +/** + * kbase_is_region_shrinkable - Check if a region is "shrinkable". + * A shrinkable regions is a region for which its backing pages (reg->gpu_alloc->pages) + * can be freed at any point, even though the kbase_va_region structure itself + * may have been refcounted. + * Regions that aren't on a shrinker, but could be shrunk at any point in future + * without warning are still considered "shrinkable" (e.g. Active JIT allocs) + * + * @reg: Pointer to region + * + * Return: true if the region is "shrinkable", false if not. + */ +static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg) +{ + return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC); +} + void kbase_remove_va_region(struct kbase_device *kbdev, struct kbase_va_region *reg); static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, @@ -714,6 +762,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( lockdep_assert_held(&kctx->reg_lock); WARN_ON(!region->va_refcnt); + WARN_ON(region->va_refcnt == INT_MAX); /* non-atomic as kctx->reg_lock is held */ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", @@ -741,6 +790,69 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( return NULL; } +/** + * kbase_va_region_is_no_user_free - Check if user free is forbidden for the region. + * A region that must not be freed by userspace indicates that it is owned by some other + * kbase subsystem, for example tiler heaps, JIT memory or CSF queues. + * Such regions must not be shrunk (i.e. have their backing pages freed), except by the + * current owner. + * Hence, callers cannot rely on this check alone to determine if a region might be shrunk + * by any part of kbase. Instead they should use kbase_is_region_shrinkable(). + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region. + * + * Return: true if userspace cannot free the region, false if userspace can free the region. + */ +static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx, + struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + return region->no_user_free_refcnt > 0; +} + +/** + * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region. + * Calling this function will prevent the region to be shrunk by parts of kbase that + * don't own the region (as long as the refcount stays above zero). Refer to + * kbase_va_region_is_no_user_free() for more information. + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region (not shrinkable). + * + * Return: the pointer to the region passed as argument. + */ +static inline struct kbase_va_region * +kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(kbase_is_region_shrinkable(region)); + WARN_ON(region->no_user_free_refcnt == INT_MAX); + + /* non-atomic as kctx->reg_lock is held */ + region->no_user_free_refcnt++; + + return region; +} + +/** + * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region. + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region (not shrinkable). + */ +static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx, + struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(!kbase_va_region_is_no_user_free(kctx, region)); + + /* non-atomic as kctx->reg_lock is held */ + region->no_user_free_refcnt--; +} + /* Common functions */ static inline struct tagged_addr *kbase_get_cpu_phy_pages( struct kbase_va_region *reg) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index 9899cef317ac..998849fa4cc2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -791,7 +792,11 @@ int kbase_mem_evictable_init(struct kbase_context *kctx) * struct shrinker does not define batch */ kctx->reclaim.batch = 0; +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(&kctx->reclaim); +#else + register_shrinker(&kctx->reclaim, "mali-mem"); +#endif return 0; } @@ -855,6 +860,9 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) lockdep_assert_held(&kctx->reg_lock); + /* Memory is in the process of transitioning to the shrinker, and + * should ignore migration attempts + */ kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, 0, gpu_alloc->nents); @@ -862,12 +870,17 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) /* This allocation can't already be on a list. */ WARN_ON(!list_empty(&gpu_alloc->evict_node)); - /* - * Add the allocation to the eviction list, after this point the shrink + /* Add the allocation to the eviction list, after this point the shrink * can reclaim it. */ list_add(&gpu_alloc->evict_node, &kctx->evict_list); atomic_add(gpu_alloc->nents, &kctx->evict_nents); + + /* Indicate to page migration that the memory can be reclaimed by the shrinker. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); + mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_mark_reclaim(gpu_alloc); @@ -919,6 +932,15 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) gpu_alloc->evicted, 0, mmu_sync_info); gpu_alloc->evicted = 0; + + /* Since the allocation is no longer evictable, and we ensure that + * it grows back to its pre-eviction size, we will consider the + * state of it to be ALLOCATED_MAPPED, as that is the only state + * in which a physical allocation could transition to NOT_MOVABLE + * from. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); } } @@ -977,7 +999,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in * & GPU queue ringbuffer and none of them needs to be explicitly marked * as evictable by Userspace. */ - if (reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, reg)) goto out_unlock; /* Is the region being transitioning between not needed and needed? */ @@ -1302,9 +1324,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id, mmu_sync_info); + kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, + kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true); if (err) goto bad_insert; @@ -1330,7 +1351,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, bad_pad_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - alloc->nents, kctx->as_nr); + alloc->nents, kctx->as_nr, true); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1359,7 +1380,7 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, int err; err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, kctx->as_nr); + alloc->pages, reg->nr_pages, kctx->as_nr, true); WARN_ON(err); } @@ -1559,10 +1580,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( int zone = KBASE_REG_ZONE_CUSTOM_VA; bool shared_zone = false; u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); - unsigned long offset_within_page; - unsigned long remaining_size; struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; + struct tagged_addr *pa; + struct device *dev; int write; /* Flag supported only for dma-buf imported memory */ @@ -1704,20 +1725,33 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( reg->gpu_alloc->nents = 0; reg->extension = 0; - if (pages) { - struct device *dev = kctx->kbdev->dev; - struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); + pa = kbase_get_gpu_phy_pages(reg); + dev = kctx->kbdev->dev; + if (pages) { /* Top bit signifies that this was pinned on import */ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; - offset_within_page = user_buf->address & ~PAGE_MASK; - remaining_size = user_buf->size; + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ for (i = 0; i < faulted_pages; i++) { - unsigned long map_size = - MIN(PAGE_SIZE - offset_within_page, remaining_size); - dma_addr_t dma_addr = dma_map_page(dev, pages[i], - offset_within_page, map_size, DMA_BIDIRECTIONAL); + dma_addr_t dma_addr = + dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; @@ -1725,8 +1759,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - remaining_size -= map_size; - offset_within_page = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); } reg->gpu_alloc->nents = faulted_pages; @@ -1735,19 +1768,19 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( return reg; unwind_dma_map: - offset_within_page = user_buf->address & ~PAGE_MASK; - remaining_size = user_buf->size; dma_mapped_pages = i; - /* Run the unmap loop in the same order as map loop */ + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This precautionary measure is kept here to keep this code + * aligned with kbase_jd_user_buf_map() to allow for a potential refactor + * in the future. + */ for (i = 0; i < dma_mapped_pages; i++) { - unsigned long unmap_size = - MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_addr_t dma_addr = user_buf->dma_addrs[i]; - dma_unmap_page(kctx->kbdev->dev, - user_buf->dma_addrs[i], - unmap_size, DMA_BIDIRECTIONAL); - remaining_size -= unmap_size; - offset_within_page = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); } fault_mismatch: if (pages) { @@ -1767,7 +1800,6 @@ no_alloc_obj: no_region: bad_size: return NULL; - } @@ -1888,9 +1920,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* validate found region */ if (kbase_is_region_invalid_or_free(aliasing_reg)) goto bad_handle; /* Not found/already free */ - if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + if (kbase_is_region_shrinkable(aliasing_reg)) goto bad_handle; /* Ephemeral region */ - if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, aliasing_reg)) goto bad_handle; /* JIT regions can't be * aliased. NO_USER_FREE flag * covers the entire lifetime @@ -2050,7 +2082,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } - + if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) { + dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached"); + goto bad_flags; + } if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { dev_warn(kctx->kbdev->dev, "padding is only supported for UMM"); @@ -2164,11 +2199,9 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, /* Map the new pages into the GPU */ phy_pages = kbase_get_gpu_phy_pages(reg); - ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags, - kctx->as_nr, reg->gpu_alloc->group_id, - mmu_sync_info); + ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags, kctx->as_nr, + reg->gpu_alloc->group_id, mmu_sync_info, reg, false); return ret; } @@ -2197,7 +2230,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, int ret = 0; ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, - alloc->pages + new_pages, delta, kctx->as_nr); + alloc->pages + new_pages, delta, kctx->as_nr, false); return ret; } @@ -2262,10 +2295,10 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) goto out_unlock; - if (reg->flags & KBASE_REG_DONT_NEED) + if (kbase_is_region_shrinkable(reg)) goto out_unlock; - if (reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, reg)) goto out_unlock; #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED @@ -2662,6 +2695,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, size_t size; int err = 0; + lockdep_assert_held(&kctx->reg_lock); + dev_dbg(kctx->kbdev->dev, "%s\n", __func__); size = (vma->vm_end - vma->vm_start); nr_pages = size >> PAGE_SHIFT; @@ -2734,7 +2769,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, size_t *nr_pages, size_t *aligned_offset) { - int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); struct kbase_va_region *reg; int err = 0; @@ -2775,7 +2810,6 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, /* adjust down nr_pages to what we have physically */ *nr_pages = kbase_reg_current_backed_size(reg); - if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, reg->nr_pages, 1, mmu_sync_info) != 0) { dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); @@ -3016,6 +3050,99 @@ void kbase_sync_mem_regions(struct kbase_context *kctx, } } +/** + * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for + * array of physical pages + * + * @pages: Array of pages. + * @page_count: Number of pages. + * @flags: Region flags. + * + * This function is supposed to be called only if page migration support + * is enabled in the driver. + * + * The counter of kernel CPU mappings of the physical pages involved in a + * mapping operation is incremented by 1. Errors are handled by making pages + * not movable. Permanent kernel mappings will be marked as not movable, too. + */ +static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages, + size_t page_count, unsigned long flags) +{ + size_t i; + + for (i = 0; i < page_count; i++) { + struct page *p = as_page(pages[i]); + struct kbase_page_metadata *page_md = kbase_page_private(p); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(pages[i]) || is_partial(pages[i])) + continue; + + spin_lock(&page_md->migrate_lock); + /* Mark permanent kernel mappings as NOT_MOVABLE because they're likely + * to stay mapped for a long time. However, keep on counting the number + * of mappings even for them: they don't represent an exception for the + * vmap_count. + * + * At the same time, errors need to be handled if a client tries to add + * too many mappings, hence a page may end up in the NOT_MOVABLE state + * anyway even if it's not a permanent kernel mapping. + */ + if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + if (page_md->vmap_count < U8_MAX) + page_md->vmap_count++; + else + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } +} + +/** + * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for + * array of physical pages + * + * @pages: Array of pages. + * @page_count: Number of pages. + * + * This function is supposed to be called only if page migration support + * is enabled in the driver. + * + * The counter of kernel CPU mappings of the physical pages involved in a + * mapping operation is decremented by 1. Errors are handled by making pages + * not movable. + */ +static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages, + size_t page_count) +{ + size_t i; + + for (i = 0; i < page_count; i++) { + struct page *p = as_page(pages[i]); + struct kbase_page_metadata *page_md = kbase_page_private(p); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(pages[i]) || is_partial(pages[i])) + continue; + + spin_lock(&page_md->migrate_lock); + /* Decrement the number of mappings for all kinds of pages, including + * pages which are NOT_MOVABLE (e.g. permanent kernel mappings). + * However, errors still need to be handled if a client tries to remove + * more mappings than created. + */ + if (page_md->vmap_count == 0) + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + else + page_md->vmap_count--; + spin_unlock(&page_md->migrate_lock); + } +} + static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, kbase_vmap_flag vmap_flags) @@ -3088,6 +3215,13 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi */ cpu_addr = vmap(pages, page_count, VM_MAP, prot); + /* If page migration is enabled, increment the number of VMA mappings + * of all physical pages. In case of errors, e.g. too many mappings, + * make the page not movable to prevent trouble. + */ + if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type)) + kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags); + kfree(pages); if (!cpu_addr) @@ -3111,6 +3245,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi atomic_add(page_count, &kctx->permanent_mapped_pages); kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); + return 0; } @@ -3162,6 +3297,9 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto out_unlock; + addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u); out_unlock: @@ -3189,6 +3327,17 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, vunmap(addr); + /* If page migration is enabled, decrement the number of VMA mappings + * for all physical pages. Now is a good time to do it because references + * haven't been released yet. + */ + if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) { + const size_t page_count = PFN_UP(map->offset_in_page + map->size); + struct tagged_addr *pages_array = map->cpu_pages; + + kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count); + } + if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) { diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h index 5b12e181bf4c..6dda44b9f128 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h @@ -284,7 +284,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_reg * have been released in the mean time. * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region * lock is now held again. - * * Or, @reg has had KBASE_REG_NO_USER_FREE set at creation time or under the region lock, and the + * * Or, @reg has had NO_USER_FREE set at creation time or under the region lock, and the * region lock is now held again. * * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS. diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c index 8526688b7b12..737f7da5595d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c @@ -22,11 +22,11 @@ /** * DOC: Base kernel page migration implementation. */ - #include #include #include +#include /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. @@ -36,7 +36,12 @@ int kbase_page_migration_enabled; module_param(kbase_page_migration_enabled, int, 0444); KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); -bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) +static const struct movable_operations movable_ops; +#endif + +bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, + u8 group_id) { struct kbase_page_metadata *page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); @@ -48,19 +53,43 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a set_page_private(p, (unsigned long)page_md); page_md->dma_addr = dma_addr; page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS); + page_md->vmap_count = 0; + page_md->group_id = group_id; spin_lock_init(&page_md->migrate_lock); lock_page(p); - if (kbdev->mem_migrate.mapping) { - __SetPageMovable(p, kbdev->mem_migrate.mapping); +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(p, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); +#else + /* In some corner cases, the driver may attempt to allocate memory pages + * even before the device file is open and the mapping for address space + * operations is created. In that case, it is impossible to assign address + * space operations to memory pages: simply pretend that they are movable, + * even if they are not. + * + * The page will go through all state transitions but it will never be + * actually considered movable by the kernel. This is due to the fact that + * the page cannot be marked as NOT_MOVABLE upon creation, otherwise the + * memory pool will always refuse to add it to the pool and schedule + * a worker thread to free it later. + * + * Page metadata may seem redundant in this case, but they are not, + * because memory pools expect metadata to be present when page migration + * is enabled and because the pages may always return to memory pools and + * gain the movable property later on in their life cycle. + */ + if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) { + __SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping); page_md->status = PAGE_MOVABLE_SET(page_md->status); } +#endif unlock_page(p); return true; } -static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p) +static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, u8 *group_id) { struct device *const dev = kbdev->dev; struct kbase_page_metadata *page_md; @@ -70,10 +99,13 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p) if (!page_md) return; + if (group_id) + *group_id = page_md->group_id; dma_addr = kbase_dma_addr(p); dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); kfree(page_md); + set_page_private(p, 0); ClearPagePrivate(p); } @@ -91,6 +123,7 @@ static void kbase_free_pages_worker(struct work_struct *work) spin_unlock(&mem_migrate->free_pages_lock); list_for_each_entry_safe(p, tmp, &free_list, lru) { + u8 group_id = 0; list_del_init(&p->lru); lock_page(p); @@ -101,8 +134,8 @@ static void kbase_free_pages_worker(struct work_struct *work) } unlock_page(p); - kbase_free_page_metadata(kbdev, p); - __free_pages(p, 0); + kbase_free_page_metadata(kbdev, p, &group_id); + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0); } } @@ -115,6 +148,145 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) spin_unlock(&mem_migrate->free_pages_lock); } +/** + * kbasep_migrate_page_pt_mapped - Migrate a memory page that is mapped + * in a PGD of kbase_mmu_table. + * + * @old_page: Existing PGD page to remove + * @new_page: Destination for migrating the existing PGD page to + * + * Replace an existing PGD page with a new page by migrating its content. More specifically: + * the new page shall replace the existing PGD page in the MMU page table. Before returning, + * the new page shall be set as movable and not isolated, while the old page shall lose + * the movable property. The meta data attached to the PGD page is transferred to the + * new (replacement) page. + * + * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure + * and the migration is aborted. + */ +static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new_page) +{ + struct kbase_page_metadata *page_md = kbase_page_private(old_page); + struct kbase_context *kctx = page_md->data.pt_mapped.mmut->kctx; + struct kbase_device *kbdev = kctx->kbdev; + dma_addr_t old_dma_addr = page_md->dma_addr; + dma_addr_t new_dma_addr; + int ret; + + /* Create a new dma map for the new page */ + new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(kbdev->dev, new_dma_addr)) + return -ENOMEM; + + /* Lock context to protect access to the page in physical allocation. + * This blocks the CPU page fault handler from remapping pages. + * Only MCU's mmut is device wide, i.e. no corresponding kctx. + */ + kbase_gpu_vm_lock(kctx); + + ret = kbase_mmu_migrate_page( + as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, + new_dma_addr, PGD_VPFN_LEVEL_GET_LEVEL(page_md->data.pt_mapped.pgd_vpfn_level)); + + if (ret == 0) { + dma_unmap_page(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __ClearPageMovable(old_page); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + ClearPagePrivate(old_page); + put_page(old_page); + + page_md = kbase_page_private(new_page); +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(new_page, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); +#else + if (kbdev->mem_migrate.inode->i_mapping) { + __SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping); + page_md->status = PAGE_MOVABLE_SET(page_md->status); + } +#endif + SetPagePrivate(new_page); + get_page(new_page); + } else + dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Page fault handler for CPU mapping unblocked. */ + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +/* + * kbasep_migrate_page_allocated_mapped - Migrate a memory page that is both + * allocated and mapped. + * + * @old_page: Page to remove. + * @new_page: Page to add. + * + * Replace an old page with a new page by migrating its content and all its + * CPU and GPU mappings. More specifically: the new page shall replace the + * old page in the MMU page table, as well as in the page array of the physical + * allocation, which is used to create CPU mappings. Before returning, the new + * page shall be set as movable and not isolated, while the old page shall lose + * the movable property. + */ +static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page) +{ + struct kbase_page_metadata *page_md = kbase_page_private(old_page); + struct kbase_context *kctx = page_md->data.mapped.mmut->kctx; + dma_addr_t old_dma_addr, new_dma_addr; + int ret; + + old_dma_addr = page_md->dma_addr; + new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr)) + return -ENOMEM; + + /* Lock context to protect access to array of pages in physical allocation. + * This blocks the CPU page fault handler from remapping pages. + */ + kbase_gpu_vm_lock(kctx); + + /* Unmap the old physical range. */ + unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT, + PAGE_SIZE, 1); + + ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)), + as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr, + MIDGARD_MMU_BOTTOMLEVEL); + + if (ret == 0) { + dma_unmap_page(kctx->kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + SetPagePrivate(new_page); + get_page(new_page); + + /* Clear PG_movable from the old page and release reference. */ + ClearPagePrivate(old_page); + __ClearPageMovable(old_page); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + put_page(old_page); + + page_md = kbase_page_private(new_page); + /* Set PG_movable to the new page. */ +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(new_page, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); +#else + if (kctx->kbdev->mem_migrate.inode->i_mapping) { + __SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping); + page_md->status = PAGE_MOVABLE_SET(page_md->status); + } +#endif + } else + dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Page fault handler for CPU mapping unblocked. */ + kbase_gpu_vm_unlock(kctx); + + return ret; +} + /** * kbase_page_isolate - Isolate a page for migration. * @@ -133,6 +305,9 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) CSTD_UNUSED(mode); + if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) + return false; + if (!spin_trylock(&page_md->migrate_lock)) return false; @@ -152,17 +327,29 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) atomic_inc(&mem_pool->isolation_in_progress_cnt); break; case ALLOCATED_MAPPED: + /* Mark the page into isolated state, but only if it has no + * kernel CPU mappings + */ + if (page_md->vmap_count == 0) + page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); + break; case PT_MAPPED: - /* Only pages in a memory pool can be isolated for now. */ + /* Mark the page into isolated state. */ + page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); break; case SPILL_IN_PROGRESS: case ALLOCATE_IN_PROGRESS: case FREE_IN_PROGRESS: - /* Transitory state: do nothing. */ + break; + case NOT_MOVABLE: + /* Opportunistically clear the movable property for these pages */ + __ClearPageMovable(p); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); break; default: /* State should always fall in one of the previous cases! - * Also notice that FREE_ISOLATED_IN_PROGRESS is impossible because + * Also notice that FREE_ISOLATED_IN_PROGRESS or + * FREE_PT_ISOLATED_IN_PROGRESS is impossible because * that state only applies to pages that are already isolated. */ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); @@ -210,17 +397,31 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) * * Return: 0 on success, error code otherwise. */ +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) static int kbase_page_migrate(struct address_space *mapping, struct page *new_page, struct page *old_page, enum migrate_mode mode) +#else +static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum migrate_mode mode) +#endif { int err = 0; bool status_mem_pool = false; + bool status_free_pt_isolated_in_progress = false; + bool status_free_isolated_in_progress = false; + bool status_pt_mapped = false; + bool status_mapped = false; + bool status_not_movable = false; struct kbase_page_metadata *page_md = kbase_page_private(old_page); - struct kbase_device *kbdev; + struct kbase_device *kbdev = NULL; +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) CSTD_UNUSED(mapping); +#endif CSTD_UNUSED(mode); + if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) + return -EINVAL; + if (!spin_trylock(&page_md->migrate_lock)) return -EAGAIN; @@ -235,10 +436,22 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa kbdev = page_md->data.mem_pool.kbdev; break; case ALLOCATED_MAPPED: + status_mapped = true; + break; case PT_MAPPED: + status_pt_mapped = true; + break; case FREE_ISOLATED_IN_PROGRESS: - case MULTI_MAPPED: - /* So far, only pages in a memory pool can be migrated. */ + status_free_isolated_in_progress = true; + kbdev = page_md->data.free_isolated.kbdev; + break; + case FREE_PT_ISOLATED_IN_PROGRESS: + status_free_pt_isolated_in_progress = true; + kbdev = page_md->data.free_pt_isolated.kbdev; + break; + case NOT_MOVABLE: + status_not_movable = true; + break; default: /* State should always fall in one of the previous cases! */ err = -EAGAIN; @@ -247,18 +460,37 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa spin_unlock(&page_md->migrate_lock); - if (status_mem_pool) { + if (status_mem_pool || status_free_isolated_in_progress || + status_free_pt_isolated_in_progress) { struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; - kbase_free_page_metadata(kbdev, old_page); + kbase_free_page_metadata(kbdev, old_page, NULL); __ClearPageMovable(old_page); page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + put_page(old_page); /* Just free new page to avoid lock contention. */ INIT_LIST_HEAD(&new_page->lru); + get_page(new_page); set_page_private(new_page, 0); kbase_free_page_later(kbdev, new_page); queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } else if (status_not_movable) { + err = -EINVAL; + } else if (status_mapped) { + err = kbasep_migrate_page_allocated_mapped(old_page, new_page); + } else if (status_pt_mapped) { + err = kbasep_migrate_page_pt_mapped(old_page, new_page); + } + + /* While we want to preserve the movability of pages for which we return + * EAGAIN, according to the kernel docs, movable pages for which a critical + * error is returned are called putback on, which may not be what we + * expect. + */ + if (err < 0 && err != -EAGAIN) { + __ClearPageMovable(old_page); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); } return err; @@ -277,13 +509,23 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa static void kbase_page_putback(struct page *p) { bool status_mem_pool = false; + bool status_free_isolated_in_progress = false; + bool status_free_pt_isolated_in_progress = false; struct kbase_page_metadata *page_md = kbase_page_private(p); - struct kbase_device *kbdev; + struct kbase_device *kbdev = NULL; + + /* If we don't have page metadata, the page may not belong to the + * driver or may already have been freed, and there's nothing we can do + */ + if (!page_md) + return; spin_lock(&page_md->migrate_lock); - /* Page must have been isolated to reach here but metadata is incorrect. */ - WARN_ON(!IS_PAGE_ISOLATED(page_md->status)); + if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) { + spin_unlock(&page_md->migrate_lock); + return; + } switch (PAGE_STATUS_GET(page_md->status)) { case MEM_POOL: @@ -291,11 +533,22 @@ static void kbase_page_putback(struct page *p) kbdev = page_md->data.mem_pool.kbdev; break; case ALLOCATED_MAPPED: + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + break; case PT_MAPPED: - case FREE_ISOLATED_IN_PROGRESS: - /* Only pages in a memory pool can be isolated for now. - * Therefore only pages in a memory pool can be 'putback'. + case NOT_MOVABLE: + /* Pages should no longer be isolated if they are in a stable state + * and used by the driver. */ + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + break; + case FREE_ISOLATED_IN_PROGRESS: + status_free_isolated_in_progress = true; + kbdev = page_md->data.free_isolated.kbdev; + break; + case FREE_PT_ISOLATED_IN_PROGRESS: + status_free_pt_isolated_in_progress = true; + kbdev = page_md->data.free_pt_isolated.kbdev; break; default: /* State should always fall in one of the previous cases! */ @@ -304,35 +557,59 @@ static void kbase_page_putback(struct page *p) spin_unlock(&page_md->migrate_lock); - /* If page was in a memory pool then just free it to avoid lock contention. */ - if (!WARN_ON(!status_mem_pool)) { - struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; - + /* If page was in a memory pool then just free it to avoid lock contention. The + * same is also true to status_free_pt_isolated_in_progress. + */ + if (status_mem_pool || status_free_isolated_in_progress || + status_free_pt_isolated_in_progress) { __ClearPageMovable(p); page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); - list_del_init(&p->lru); - kbase_free_page_later(kbdev, p); - queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + + if (!WARN_ON_ONCE(!kbdev)) { + struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + + kbase_free_page_later(kbdev, p); + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } } } +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) +static const struct movable_operations movable_ops = { + .isolate_page = kbase_page_isolate, + .migrate_page = kbase_page_migrate, + .putback_page = kbase_page_putback, +}; +#else static const struct address_space_operations kbase_address_space_ops = { .isolate_page = kbase_page_isolate, .migratepage = kbase_page_migrate, .putback_page = kbase_page_putback, }; +#endif +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp) { + mutex_lock(&kbdev->fw_load_lock); + if (filp) { filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops; - if (!kbdev->mem_migrate.mapping) - kbdev->mem_migrate.mapping = filp->f_inode->i_mapping; - else - WARN_ON(kbdev->mem_migrate.mapping != filp->f_inode->i_mapping); + if (!kbdev->mem_migrate.inode) { + kbdev->mem_migrate.inode = filp->f_inode; + /* This reference count increment is balanced by iput() + * upon termination. + */ + atomic_inc(&filp->f_inode->i_count); + } else { + WARN_ON(kbdev->mem_migrate.inode != filp->f_inode); + } } + + mutex_unlock(&kbdev->fw_load_lock); } +#endif void kbase_mem_migrate_init(struct kbase_device *kbdev) { @@ -344,6 +621,9 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev) spin_lock_init(&mem_migrate->free_pages_lock); INIT_LIST_HEAD(&mem_migrate->free_pages_list); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + mem_migrate->inode = NULL; +#endif mem_migrate->free_pages_workq = alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1); INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker); @@ -355,4 +635,7 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev) if (mem_migrate->free_pages_workq) destroy_workqueue(mem_migrate->free_pages_workq); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + iput(mem_migrate->inode); +#endif } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h index d4796327b8d7..76bbc999e110 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h @@ -50,6 +50,8 @@ extern int kbase_page_migration_enabled; * @kbdev: Pointer to kbase device. * @p: Page to assign metadata to. * @dma_addr: DMA address mapped to paged. + * @group_id: Memory group ID associated with the entity that is + * allocating the page metadata. * * This will allocate memory for the page's metadata, initialize it and * assign a reference to the page's private field. Importantly, once @@ -58,7 +60,8 @@ extern int kbase_page_migration_enabled; * * Return: true if successful or false otherwise. */ -bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr); +bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, + u8 group_id); /** * kbase_free_page_later - Defer freeing of given page. @@ -70,6 +73,7 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a */ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) /* * kbase_mem_migrate_set_address_space_ops - Set address space operations * @@ -81,6 +85,7 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); * add a reference to @kbdev. */ void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp); +#endif /* * kbase_mem_migrate_init - Initialise kbase page migration diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c index dce066db7385..75569cc51c52 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c @@ -57,37 +57,59 @@ static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) return kbase_mem_pool_size(pool) == 0; } -static void set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, +static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, struct list_head *page_list, size_t *list_size) { struct kbase_page_metadata *page_md = kbase_page_private(p); + bool not_movable = false; lockdep_assert_held(&pool->pool_lock); + /* Free the page instead of adding it to the pool if it's not movable. + * Only update page status and add the page to the memory pool if + * it is not isolated. + */ spin_lock(&page_md->migrate_lock); - /* Only update page status and add the page to the memory pool if it is not isolated */ - if (!WARN_ON(IS_PAGE_ISOLATED(page_md->status))) { + if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { + not_movable = true; + } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); page_md->data.mem_pool.pool = pool; page_md->data.mem_pool.kbdev = pool->kbdev; - list_move(&p->lru, page_list); + list_add(&p->lru, page_list); (*list_size)++; } spin_unlock(&page_md->migrate_lock); + + if (not_movable) { + kbase_free_page_later(pool->kbdev, p); + pool_dbg(pool, "skipping a not movable page\n"); + } + + return not_movable; } static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, struct page *p) { + bool queue_work_to_free = false; + lockdep_assert_held(&pool->pool_lock); - if (!pool->order && kbase_page_migration_enabled) - set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size); - else { + if (!pool->order && kbase_page_migration_enabled) { + if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) + queue_work_to_free = true; + } else { list_add(&p->lru, &pool->page_list); pool->cur_size++; } + if (queue_work_to_free) { + struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } + pool_dbg(pool, "added page\n"); } @@ -101,18 +123,29 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, struct list_head *page_list, size_t nr_pages) { + bool queue_work_to_free = false; + lockdep_assert_held(&pool->pool_lock); if (!pool->order && kbase_page_migration_enabled) { struct page *p, *tmp; - list_for_each_entry_safe(p, tmp, page_list, lru) - set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size); + list_for_each_entry_safe(p, tmp, page_list, lru) { + list_del_init(&p->lru); + if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) + queue_work_to_free = true; + } } else { list_splice(page_list, &pool->page_list); pool->cur_size += nr_pages; } + if (queue_work_to_free) { + struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } + pool_dbg(pool, "added %zu pages\n", nr_pages); } @@ -226,7 +259,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) /* Setup page metadata for 4KB pages when page migration is enabled */ if (!pool->order && kbase_page_migration_enabled) { INIT_LIST_HEAD(&p->lru); - if (!kbase_alloc_page_metadata(kbdev, p, dma_addr)) { + if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) { dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, pool->order); @@ -251,7 +284,14 @@ static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool) void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) { - struct kbase_device *kbdev = pool->kbdev; + struct kbase_device *kbdev; + + if (WARN_ON(!pool)) + return; + if (WARN_ON(!p)) + return; + + kbdev = pool->kbdev; if (!pool->order && kbase_page_migration_enabled) { kbase_free_page_later(kbdev, p); @@ -460,7 +500,11 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool * struct shrinker does not define batch */ pool->reclaim.batch = 0; +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(&pool->reclaim); +#else + register_shrinker(&pool->reclaim, "mali-mem-pool"); +#endif pool_dbg(pool, "initialized\n"); @@ -499,14 +543,16 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) /* Zero pages first without holding the next_pool lock */ for (i = 0; i < nr_to_spill; i++) { p = kbase_mem_pool_remove_locked(pool, SPILL_IN_PROGRESS); - list_add(&p->lru, &spill_list); + if (p) + list_add(&p->lru, &spill_list); } } while (!kbase_mem_pool_is_empty(pool)) { /* Free remaining pages to kernel */ p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS); - list_add(&p->lru, &free_list); + if (p) + list_add(&p->lru, &free_list); } kbase_mem_pool_unlock(pool); @@ -558,17 +604,10 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) { - struct page *p; - lockdep_assert_held(&pool->pool_lock); pool_dbg(pool, "alloc_locked()\n"); - p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); - - if (p) - return p; - - return NULL; + return kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); } void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, @@ -636,10 +675,12 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, /* Get pages from this pool */ kbase_mem_pool_lock(pool); nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); + while (nr_from_pool--) { int j; p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); + if (pool->order) { pages[i++] = as_tagged_tag(page_to_phys(p), HUGE_HEAD | HUGE_PAGE); @@ -867,7 +908,6 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pages[i] = as_tagged(0); continue; } - p = as_page(pages[i]); kbase_mem_pool_free_page(pool, p); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c index b0c5126afcbe..212a61f68372 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c @@ -502,6 +502,7 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) kbase_js_sched_all(katom->kctx->kbdev); } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -673,8 +674,8 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; - unsigned long nr_pages = - alloc->imported.user_buf.nr_pages; + const unsigned long nr_pages = alloc->imported.user_buf.nr_pages; + const unsigned long start = alloc->imported.user_buf.address; if (alloc->imported.user_buf.mm != current->mm) { ret = -EINVAL; @@ -686,11 +687,9 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) ret = -ENOMEM; goto out_unlock; } - - ret = get_user_pages_fast( - alloc->imported.user_buf.address, - nr_pages, 0, - buffers[i].extres_pages); + kbase_gpu_vm_unlock(katom->kctx); + ret = get_user_pages_fast(start, nr_pages, 0, buffers[i].extres_pages); + kbase_gpu_vm_lock(katom->kctx); if (ret != nr_pages) { /* Adjust number of pages, so that we only * attempt to release pages in the array that we @@ -728,7 +727,6 @@ out_cleanup: return ret; } -#endif /* !MALI_USE_CSF */ #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, @@ -760,8 +758,18 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, } #endif -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data) +/** + * kbase_mem_copy_from_extres() - Copy from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @buf_data: Pointer to the information about external resources: + * pages pertaining to the external resource, number of + * pages to copy. + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) { unsigned int i; unsigned int target_page_nr = 0; @@ -848,7 +856,6 @@ out_unlock: return ret; } -#if !MALI_USE_CSF static int kbase_debug_copy(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -866,6 +873,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) return 0; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ #endif /* !MALI_USE_CSF */ #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) @@ -963,11 +971,6 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) ret = -EINVAL; goto free_info; } - /* Clear any remaining bytes when user struct is smaller than - * kernel struct. For jit version 1, this also clears the - * padding bytes - */ - memset(((u8 *)info) + sizeof(*info), 0, sizeof(*info) - sizeof(*info)); ret = kbasep_jit_alloc_validate(kctx, info); if (ret) @@ -1541,6 +1544,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) case BASE_JD_REQ_SOFT_EVENT_RESET: kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: { int res = kbase_debug_copy(katom); @@ -1549,6 +1553,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) katom->event_code = BASE_JD_EVENT_JOB_INVALID; break; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: ret = kbase_jit_allocate_process(katom); break; @@ -1654,8 +1659,10 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) if (katom->jc == 0) return -EINVAL; break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: return kbase_debug_copy_prepare(katom); +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_EXT_RES_MAP: return kbase_ext_res_prepare(katom); case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: @@ -1687,9 +1694,11 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) kbase_sync_fence_in_remove(katom); break; #endif /* CONFIG_SYNC_FILE */ +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: kbase_debug_copy_finish(katom); break; +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: kbase_jit_allocate_finish(katom); break; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c index 853c89796d44..d770913e9da5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c @@ -41,6 +41,11 @@ #include #include +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include +#endif + /* Hwcnt reader API version */ #define HWCNT_READER_API 1 diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c index db2086079c14..4a0926531af2 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c @@ -88,12 +88,11 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, * context's address space, when the page fault occurs for * MCU's address space. */ - if (!queue_work(as->pf_wq, &as->work_pagefault)) - kbase_ctx_sched_release_ctx(kctx); - else { + if (!queue_work(as->pf_wq, &as->work_pagefault)) { dev_dbg(kbdev->dev, - "Page fault is already pending for as %u\n", - as_nr); + "Page fault is already pending for as %u", as_nr); + kbase_ctx_sched_release_ctx(kctx); + } else { atomic_inc(&kbdev->faults_pending); } } @@ -552,14 +551,14 @@ void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, } KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt); -int kbase_mmu_as_init(struct kbase_device *kbdev, int i) +int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) { kbdev->as[i].number = i; kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; kbdev->as[i].gf_data.addr = 0ULL; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c index 22786f0748ce..83605c3dc56f 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c @@ -328,7 +328,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) while (bf_bits | pf_bits) { struct kbase_as *as; - int as_no; + unsigned int as_no; struct kbase_context *kctx; struct kbase_fault *fault; @@ -423,13 +423,13 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, return kbase_job_slot_softstop_start_rp(kctx, reg); } -int kbase_mmu_as_init(struct kbase_device *kbdev, int i) +int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) { kbdev->as[i].number = i; kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c index 8f261d439909..3131d57ef330 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -156,7 +157,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz } else if (op == KBASE_MMU_OP_FLUSH_MEM) { flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; } else { - dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n", op); + dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op); return; } @@ -167,7 +168,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz * perform a reset to recover */ dev_err(kbdev->dev, - "Flush for physical address range did not complete. Issuing GPU soft-reset to recover\n"); + "Flush for physical address range did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); @@ -230,9 +231,8 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); - if (kbase_prepare_to_reset_gpu( - kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -326,7 +326,7 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct * perform a reset to recover. */ dev_err(kbdev->dev, - "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); @@ -340,15 +340,7 @@ static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_cont phys_addr_t phys, size_t size, enum kbase_mmu_op_type flush_op) { -#if MALI_USE_CSF - unsigned long irq_flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && - kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) - mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); -#endif + kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op); } static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size) @@ -398,9 +390,9 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context * a 4kB physical page. */ -static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, unsigned long flags, - int group_id, u64 *dirty_pgds); +static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id, u64 *dirty_pgds); /** * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and @@ -420,6 +412,65 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, u64 vpfn, int level, enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, struct list_head *free_pgds_list); + +static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) +{ + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + struct page *p) +{ + struct kbase_page_metadata *page_md = kbase_page_private(p); + bool page_is_isolated = false; + + lockdep_assert_held(&mmut->mmu_lock); + + if (!kbase_page_migration_enabled) + return false; + + spin_lock(&page_md->migrate_lock); + if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) { + WARN_ON_ONCE(!mmut->kctx); + if (IS_PAGE_ISOLATED(page_md->status)) { + page_md->status = PAGE_STATUS_SET(page_md->status, + FREE_PT_ISOLATED_IN_PROGRESS); + page_md->data.free_pt_isolated.kbdev = kbdev; + page_is_isolated = true; + } else { + page_md->status = + PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); + } + } else { + WARN_ON_ONCE(mmut->kctx); + WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE); + } + spin_unlock(&page_md->migrate_lock); + + if (unlikely(page_is_isolated)) { + /* Do the CPU cache flush and accounting here for the isolated + * PGD page, which is done inside kbase_mmu_free_pgd() for the + * PGD page that did not get isolated. + */ + dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE, + DMA_BIDIRECTIONAL); + kbase_mmu_account_freed_pgd(kbdev, mmut); + } + + return page_is_isolated; +} + /** * kbase_mmu_free_pgd() - Free memory of the page directory * @@ -441,17 +492,7 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); - atomic_sub(1, &kbdev->memdev.used_pages); - - /* If MMU tables belong to a context then pages will have been accounted - * against it, so we must decrement the usage counts here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - atomic_sub(1, &mmut->kctx->used_pages); - } - - kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); + kbase_mmu_account_freed_pgd(kbdev, mmut); } /** @@ -482,6 +523,20 @@ static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mm mutex_unlock(&mmut->mmu_lock); } +static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + struct page *p, struct list_head *free_pgds_list) +{ + bool page_is_isolated = false; + + lockdep_assert_held(&mmut->mmu_lock); + + page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); + + if (likely(!page_is_isolated)) + list_add(&p->lru, free_pgds_list); +} + /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to * a region on a GPU page fault @@ -509,7 +564,7 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, if (!multiple) { dev_warn( kbdev->dev, - "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", + "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW", ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); return minimum_extra; } @@ -692,8 +747,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, } /* Now make this faulting page writable to GPU. */ - kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags, - region->gpu_alloc->group_id, &dirty_pgds); + kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1, + region->flags, region->gpu_alloc->group_id, &dirty_pgds); kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, kctx->id, dirty_pgds); @@ -917,7 +972,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, */ dev_warn( kctx->kbdev->dev, - "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available\n", + "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available", new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced, total_mempools_free_4k); *pages_to_grow = 0; @@ -985,9 +1040,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); - dev_dbg(kbdev->dev, - "Entering %s %pK, fault_pfn %lld, as_no %d\n", - __func__, (void *)data, fault_pfn, as_no); + dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data, + fault_pfn, as_no); /* Grab the context that was already refcounted in kbase_mmu_interrupt() * Therefore, it cannot be scheduled out of this AS until we explicitly @@ -1010,8 +1064,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) #ifdef CONFIG_MALI_ARBITER_SUPPORT /* check if we still have GPU */ if (unlikely(kbase_is_gpu_removed(kbdev))) { - dev_dbg(kbdev->dev, - "%s: GPU has been removed\n", __func__); + dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); goto fault_done; } #endif @@ -1206,8 +1259,7 @@ page_fault_retry: /* cap to max vsize */ new_pages = min(new_pages, region->nr_pages - current_backed_size); - dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n", - new_pages); + dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages); if (new_pages == 0) { struct kbase_mmu_hw_op_param op_param; @@ -1284,11 +1336,10 @@ page_fault_retry: * so the no_flush version of insert_pages is used which allows * us to unlock the MMU as we see fit. */ - err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, - region->start_pfn + pfn_offset, - &kbase_get_gpu_phy_pages(region)[pfn_offset], - new_pages, region->flags, - region->gpu_alloc->group_id, &dirty_pgds); + err = kbase_mmu_insert_pages_no_flush( + kbdev, &kctx->mmu, region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags, + region->gpu_alloc->group_id, &dirty_pgds, region, false); if (err) { kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); @@ -1314,16 +1365,11 @@ page_fault_retry: if (region->threshold_pages && kbase_reg_current_backed_size(region) > region->threshold_pages) { - - dev_dbg(kctx->kbdev->dev, - "%zu pages exceeded IR threshold %zu\n", - new_pages + current_backed_size, - region->threshold_pages); + dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu", + new_pages + current_backed_size, region->threshold_pages); if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { - dev_dbg(kctx->kbdev->dev, - "Get region %pK for IR\n", - (void *)region); + dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region); kbase_va_region_alloc_get(kctx, region); } } @@ -1441,7 +1487,7 @@ page_fault_retry: kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Page allocation failure", fault); } else { - dev_dbg(kbdev->dev, "Try again after pool_grow\n"); + dev_dbg(kbdev->dev, "Try again after pool_grow"); goto page_fault_retry; } } @@ -1468,7 +1514,7 @@ fault_done: release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); - dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data); } static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, @@ -1532,11 +1578,10 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * u64 *dirty_pgds) { u64 *page; + u64 pgd_vpfn = vpfn; phys_addr_t target_pgd; struct page *p; - KBASE_DEBUG_ASSERT(*pgd); - lockdep_assert_held(&mmut->mmu_lock); /* @@ -1549,7 +1594,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * p = pfn_to_page(PFN_DOWN(*pgd)); page = kmap(p); if (page == NULL) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); return -EINVAL; } @@ -1559,8 +1604,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { - dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", - __func__); + dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__); kunmap(p); return -ENOMEM; } @@ -1585,9 +1629,32 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * * GPU cache is still needed. For explanation, please refer * the comment in kbase_mmu_insert_pages_no_flush(). */ - kbase_mmu_sync_pgd(kbdev, mmut->kctx, *pgd + (vpfn * sizeof(u64)), - kbase_dma_addr(p) + (vpfn * sizeof(u64)), sizeof(u64), - KBASE_MMU_OP_FLUSH_PT); + kbase_mmu_sync_pgd(kbdev, mmut->kctx, + *pgd + (vpfn * sizeof(u64)), + kbase_dma_addr(p) + (vpfn * sizeof(u64)), + sizeof(u64), KBASE_MMU_OP_FLUSH_PT); + + /* Update the new target_pgd page to its stable state */ + if (kbase_page_migration_enabled) { + struct kbase_page_metadata *page_md = + kbase_page_private(phys_to_page(target_pgd)); + + spin_lock(&page_md->migrate_lock); + + WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || + IS_PAGE_ISOLATED(page_md->status)); + + if (mmut->kctx) { + page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); + page_md->data.pt_mapped.mmut = mmut; + page_md->data.pt_mapped.pgd_vpfn_level = + PGD_VPFN_LEVEL_SET(pgd_vpfn, level); + } else { + page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); + } + + spin_unlock(&page_md->migrate_lock); + } } else { target_pgd = kbdev->mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( @@ -1618,9 +1685,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds); /* Handle failure condition */ if (err) { - dev_dbg(kbdev->dev, - "%s: mmu_get_next_pgd failure at level %d\n", - __func__, l); + dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__, + l); return err; } } @@ -1640,7 +1706,8 @@ static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 from_vpfn, u64 to_vpfn, u64 *dirty_pgds, - struct list_head *free_pgds_list) + struct list_head *free_pgds_list, + struct tagged_addr *phys, bool ignore_page_migration) { u64 vpfn = from_vpfn; struct kbase_mmu_mode const *mmu_mode; @@ -1693,8 +1760,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, pcount = count; break; default: - dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n", - __func__, level); + dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level); goto next; } @@ -1713,7 +1779,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, if (!num_of_valid_entries) { kunmap(p); - list_add(&p->lru, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, KBASE_MMU_OP_NONE, dirty_pgds, @@ -1734,12 +1800,34 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, next: vpfn += count; } + + /* If page migration is enabled: the only way to recover from failure + * is to mark all pages as not movable. It is not predictable what's + * going to happen to these pages at this stage. They might return + * movable once they are returned to a memory pool. + */ + if (kbase_page_migration_enabled && !ignore_page_migration && phys) { + const u64 num_pages = to_vpfn - from_vpfn + 1; + u64 i; + + for (i = 0; i < num_pages; i++) { + struct page *phys_page = as_page(phys[i]); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + if (page_md) { + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } + } + } } static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 vpfn, size_t nr, u64 dirty_pgds, - enum kbase_caller_mmu_sync_info mmu_sync_info) + enum kbase_caller_mmu_sync_info mmu_sync_info, + bool insert_pages_failed) { struct kbase_mmu_hw_op_param op_param; int as_nr = 0; @@ -1764,8 +1852,12 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, * * Operations that affect the whole GPU cache shall only be done if it's * impossible to update physical ranges. + * + * On GPUs where flushing by physical address range is supported, + * full cache flush is done when an error occurs during + * insert_pages() to keep the error handling simpler. */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed) mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); else mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); @@ -1806,6 +1898,20 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (nr == 0) return 0; + /* If page migration is enabled, pages involved in multiple GPU mappings + * are always treated as not movable. + */ + if (kbase_page_migration_enabled) { + struct page *phys_page = as_page(phys); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + if (page_md) { + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } + } + mutex_lock(&kctx->mmu.mmu_lock); while (remain) { @@ -1842,15 +1948,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n", - __func__); + dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__); if (recover_required) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list); + &dirty_pgds, &free_pgds_list, + NULL, true); } goto fail_unlock; } @@ -1858,14 +1964,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); if (recover_required) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list); + &dirty_pgds, &free_pgds_list, + NULL, true); } err = -ENOMEM; goto fail_unlock; @@ -1917,7 +2024,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, mutex_unlock(&kctx->mmu.mmu_lock); mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, - mmu_sync_info); + mmu_sync_info, false); return 0; @@ -1925,12 +2032,91 @@ fail_unlock: mutex_unlock(&kctx->mmu.mmu_lock); mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, - mmu_sync_info); + mmu_sync_info, true); kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); return err; } +static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, + struct kbase_va_region *reg, + struct kbase_mmu_table *mmut, const u64 vpfn) +{ + struct page *phys_page = as_page(phys); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + spin_lock(&page_md->migrate_lock); + + /* If no GPU va region is given: the metadata provided are + * invalid. + * + * If the page is already allocated and mapped: this is + * an additional GPU mapping, probably to create a memory + * alias, which means it is no longer possible to migrate + * the page easily because tracking all the GPU mappings + * would be too costly. + * + * In any case: the page becomes not movable. It is kept + * alive, but attempts to migrate it will fail. The page + * will be freed if it is still not movable when it returns + * to a memory pool. Notice that the movable flag is not + * cleared because that would require taking the page lock. + */ + if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED); + page_md->data.mapped.reg = reg; + page_md->data.mapped.mmut = mmut; + page_md->data.mapped.vpfn = vpfn; + } + + spin_unlock(&page_md->migrate_lock); +} + +static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, + struct tagged_addr *phys, size_t requested_nr) +{ + size_t i; + + for (i = 0; i < requested_nr; i++) { + struct page *phys_page = as_page(phys[i]); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(phys[i]) || is_partial(phys[i])) + continue; + + if (page_md) { + u8 status; + + spin_lock(&page_md->migrate_lock); + status = PAGE_STATUS_GET(page_md->status); + + if (status == ALLOCATED_MAPPED) { + if (IS_PAGE_ISOLATED(page_md->status)) { + page_md->status = PAGE_STATUS_SET( + page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS); + page_md->data.free_isolated.kbdev = kbdev; + /* At this point, we still have a reference + * to the page via its page migration metadata, + * and any page with the FREE_ISOLATED_IN_PROGRESS + * status will subsequently be freed in either + * kbase_page_migrate() or kbase_page_putback() + */ + phys[i] = as_tagged(0); + } else + page_md->status = PAGE_STATUS_SET(page_md->status, + (u8)FREE_IN_PROGRESS); + } + + spin_unlock(&page_md->migrate_lock); + } + } +} + u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr const phy, unsigned long const flags, int const level, int const group_id) @@ -1944,7 +2130,8 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id, u64 *dirty_pgds) + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; @@ -2006,14 +2193,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu } while (!err); if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__); + dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, - &free_pgds_list); + &free_pgds_list, phys, + ignore_page_migration); } goto fail_unlock; } @@ -2021,15 +2209,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", - __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, - &free_pgds_list); + &free_pgds_list, phys, + ignore_page_migration); } err = -ENOMEM; goto fail_unlock; @@ -2060,6 +2248,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu *target = kbase_mmu_create_ate(kbdev, phys[i], flags, cur_level, group_id); + + /* If page migration is enabled, this is the right time + * to update the status of the page. + */ + if (kbase_page_migration_enabled && !ignore_page_migration && + !is_huge(phys[i]) && !is_partial(phys[i])) + kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut, + insert_vpfn + i); } num_of_valid_entries += count; } @@ -2104,8 +2300,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu fail_unlock: mutex_unlock(&mmut->mmu_lock); - mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds, - CALLER_MMU_ASYNC); + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, + dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true); kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; @@ -2115,11 +2311,10 @@ fail_unlock: * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space * number 'as_nr'. */ -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int const group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info) +int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, + int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg, bool ignore_page_migration) { int err; u64 dirty_pgds = 0; @@ -2130,11 +2325,11 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, return 0; err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds); + &dirty_pgds, reg, ignore_page_migration); if (err) return err; - mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); return 0; } @@ -2285,7 +2480,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, current_pgd + (index * sizeof(u64)), sizeof(u64), flush_op); - list_add(&p->lru, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); } else { current_valid_entries--; @@ -2361,11 +2556,12 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, * @mmut: Pointer to GPU MMU page table. * @vpfn: Start page frame number of the GPU virtual pages to unmap. * @phys: Array of physical pages currently mapped to the virtual - * pages to unmap, or NULL. This is only used for GPU cache - * maintenance. + * pages to unmap, or NULL. This is used for GPU cache maintenance + * and page migration support. * @nr: Number of pages to unmap. * @as_nr: Address space number, for GPU cache maintenance operations * that happen outside a specific kbase context. + * @ignore_page_migration: Whether page migration metadata should be ignored. * * We actually discard the ATE and free the page table pages if no valid entries * exist in PGD. @@ -2384,10 +2580,11 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, * Return: 0 on success, otherwise an error code. */ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr) + struct tagged_addr *phys, size_t nr, int as_nr, + bool ignore_page_migration) { + const size_t requested_nr = nr; u64 start_vpfn = vpfn; - size_t requested_nr = nr; enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; struct kbase_mmu_mode const *mmu_mode; struct kbase_mmu_hw_op_param op_param; @@ -2478,9 +2675,8 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table switch (level) { case MIDGARD_MMU_LEVEL(0): case MIDGARD_MMU_LEVEL(1): - dev_warn(kbdev->dev, - "%s: No support for ATEs at level %d\n", - __func__, level); + dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, + level); kunmap(p); goto out; case MIDGARD_MMU_LEVEL(2): @@ -2488,9 +2684,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table if (count >= 512) { pcount = 1; } else { - dev_warn(kbdev->dev, - "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n", - __func__, count); + dev_warn( + kbdev->dev, + "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down", + __func__, count); pcount = 0; } break; @@ -2499,9 +2696,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table pcount = count; break; default: - dev_err(kbdev->dev, - "%s: found non-mapped memory, early out\n", - __func__); + dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__); vpfn += count; nr -= count; continue; @@ -2530,7 +2725,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table pgd + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); - list_add(&p->lru, &free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, flush_op, &dirty_pgds, @@ -2553,7 +2748,6 @@ next: } err = 0; out: - mutex_unlock(&mmut->mmu_lock); /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ op_param = (struct kbase_mmu_hw_op_param){ .vpfn = start_vpfn, @@ -2566,6 +2760,16 @@ out: }; mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + /* If page migration is enabled: the status of all physical pages involved + * shall be updated, unless they are not movable. Their status shall be + * updated before releasing the lock to protect against concurrent + * requests to migrate the pages, if they have been isolated. + */ + if (kbase_page_migration_enabled && phys && !ignore_page_migration) + kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr); + + mutex_unlock(&mmut->mmu_lock); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; @@ -2574,9 +2778,11 @@ out: KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); /** - * kbase_mmu_update_pages_no_flush() - Update attributes data in GPU page table entries + * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU + * page table entries * - * @kctx: Kbase context + * @kbdev: Pointer to kbase device. + * @mmut: The involved MMU table * @vpfn: Virtual PFN (Page Frame Number) of the first page to update * @phys: Pointer to the array of tagged physical addresses of the physical * pages that are pointed to by the page table entries (that need to @@ -2589,26 +2795,22 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); * @dirty_pgds: Flags to track every level where a PGD has been updated. * * This will update page table entries that already exist on the GPU based on - * the new flags that are passed (the physical pages pointed to by the page - * table entries remain unchanged). It is used as a response to the changes of - * the memory attributes. + * new flags and replace any existing phy pages that are passed (the PGD pages + * remain unchanged). It is used as a response to the changes of phys as well + * as the the memory attributes. * * The caller is responsible for validating the memory attributes. * * Return: 0 if the attributes data in page table entries were updated * successfully, otherwise an error code. */ -static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, unsigned long flags, - int const group_id, u64 *dirty_pgds) +static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id, u64 *dirty_pgds) { phys_addr_t pgd; u64 *pgd_page; int err; - struct kbase_device *kbdev; - - if (WARN_ON(kctx == NULL)) - return -EINVAL; KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); @@ -2616,9 +2818,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, if (nr == 0) return 0; - mutex_lock(&kctx->mmu.mmu_lock); - - kbdev = kctx->kbdev; + mutex_lock(&mmut->mmu_lock); while (nr) { unsigned int i; @@ -2634,8 +2834,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, if (is_huge(*phys) && (index == index_in_large_page(*phys))) cur_level = MIDGARD_MMU_LEVEL(2); - err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL, - dirty_pgds); + err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd, NULL, dirty_pgds); if (WARN_ON(err)) goto fail_unlock; @@ -2662,7 +2861,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, pgd_page[level_index] = kbase_mmu_create_ate(kbdev, *target_phys, flags, MIDGARD_MMU_LEVEL(2), group_id); - kbase_mmu_sync_pgd(kbdev, kctx, pgd + (level_index * sizeof(u64)), + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)), kbase_dma_addr(p) + (level_index * sizeof(u64)), sizeof(u64), KBASE_MMU_OP_NONE); } else { @@ -2680,7 +2879,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, /* MMU cache flush strategy is NONE because GPU cache maintenance * will be done by the caller. */ - kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)), + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64), KBASE_MMU_OP_NONE); } @@ -2698,60 +2897,446 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, kunmap(p); } - mutex_unlock(&kctx->mmu.mmu_lock); + mutex_unlock(&mmut->mmu_lock); return 0; fail_unlock: - mutex_unlock(&kctx->mmu.mmu_lock); + mutex_unlock(&mmut->mmu_lock); return err; } -int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id) +static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id) { int err; struct kbase_mmu_hw_op_param op_param; u64 dirty_pgds = 0; - + struct kbase_mmu_table *mmut; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + int as_nr; - err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds); +#if !MALI_USE_CSF + if (unlikely(kctx == NULL)) + return -EINVAL; + + as_nr = kctx->as_nr; + mmut = &kctx->mmu; +#else + if (kctx) { + mmut = &kctx->mmu; + as_nr = kctx->as_nr; + } else { + mmut = &kbdev->csf.mcu_mmu; + as_nr = MCU_AS_NR; + } +#endif + + err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, + &dirty_pgds); op_param = (const struct kbase_mmu_hw_op_param){ .vpfn = vpfn, .nr = nr, .op = KBASE_MMU_OP_FLUSH_MEM, - .kctx_id = kctx->id, + .kctx_id = kctx ? kctx->id : 0xFFFFFFFF, .mmu_sync_info = mmu_sync_info, .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), }; - if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev)) - mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param); + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param); else - mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param); + mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param); + return err; } -static void mmu_teardown_level(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, phys_addr_t pgd, - int level) +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, + size_t nr, unsigned long flags, int const group_id) +{ + if (unlikely(kctx == NULL)) + return -EINVAL; + + return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id); +} + +#if MALI_USE_CSF +int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys, + size_t nr, unsigned long flags, int const group_id) +{ + return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id); +} +#endif /* MALI_USE_CSF */ + +static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress); + kbdev->mmu_page_migrate_in_progress = true; +} + +static void mmu_page_migration_transaction_end(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress); + kbdev->mmu_page_migrate_in_progress = false; + /* Invoke the PM state machine, as the MMU page migration session + * may have deferred a transition in L2 state machine. + */ + kbase_pm_update_state(kbdev); +} + +int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, + dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level) +{ + struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys)); + struct kbase_mmu_hw_op_param op_param; + struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ? + page_md->data.mapped.mmut : + page_md->data.pt_mapped.mmut; + struct kbase_device *kbdev; + phys_addr_t pgd; + u64 *old_page, *new_page, *pgd_page, *target, vpfn; + int index, check_state, ret = 0; + unsigned long hwaccess_flags = 0; + unsigned int num_of_valid_entries; + u8 vmap_count = 0; + + /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param, + * here we skip the no kctx case, which is only used with MCU's mmut. + */ + if (!mmut->kctx) + return -EINVAL; + + if (level > MIDGARD_MMU_BOTTOMLEVEL) + return -EINVAL; + else if (level == MIDGARD_MMU_BOTTOMLEVEL) + vpfn = page_md->data.mapped.vpfn; + else + vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level); + + kbdev = mmut->kctx->kbdev; + index = (vpfn >> ((3 - level) * 9)) & 0x1FF; + + /* Create all mappings before copying content. + * This is done as early as possible because is the only operation that may + * fail. It is possible to do this before taking any locks because the + * pages to migrate are not going to change and even the parent PGD is not + * going to be affected by any other concurrent operation, since the page + * has been isolated before migration and therefore it cannot disappear in + * the middle of this function. + */ + old_page = kmap(as_page(old_phys)); + if (!old_page) { + dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__); + ret = -EINVAL; + goto old_page_map_error; + } + + new_page = kmap(as_page(new_phys)); + if (!new_page) { + dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__); + ret = -EINVAL; + goto new_page_map_error; + } + + /* GPU cache maintenance affects both memory content and page table, + * but at two different stages. A single virtual memory page is affected + * by the migration. + * + * Notice that the MMU maintenance is done in the following steps: + * + * 1) The MMU region is locked without performing any other operation. + * This lock must cover the entire migration process, in order to + * prevent any GPU access to the virtual page whose physical page + * is being migrated. + * 2) Immediately after locking: the MMU region content is flushed via + * GPU control while the lock is taken and without unlocking. + * The region must stay locked for the duration of the whole page + * migration procedure. + * This is necessary to make sure that pending writes to the old page + * are finalized before copying content to the new page. + * 3) Before unlocking: changes to the page table are flushed. + * Finer-grained GPU control operations are used if possible, otherwise + * the whole GPU cache shall be flushed again. + * This is necessary to make sure that the GPU accesses the new page + * after migration. + * 4) The MMU region is unlocked. + */ +#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1)) + op_param.mmu_sync_info = CALLER_MMU_ASYNC; + op_param.kctx_id = mmut->kctx->id; + op_param.vpfn = vpfn & PGD_VPFN_MASK(level); + op_param.nr = 1 << ((3 - level) * 9); + op_param.op = KBASE_MMU_OP_FLUSH_PT; + /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */ + op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ? + pgd_level_to_skip_flush(1ULL << level) : + pgd_level_to_skip_flush(3ULL << level); + + mutex_lock(&mmut->mmu_lock); + + /* The state was evaluated before entering this function, but it could + * have changed before the mmu_lock was taken. However, the state + * transitions which are possible at this point are only two, and in both + * cases it is a stable state progressing to a "free in progress" state. + * + * After taking the mmu_lock the state can no longer change: read it again + * and make sure that it hasn't changed before continuing. + */ + spin_lock(&page_md->migrate_lock); + check_state = PAGE_STATUS_GET(page_md->status); + if (level == MIDGARD_MMU_BOTTOMLEVEL) + vmap_count = page_md->vmap_count; + spin_unlock(&page_md->migrate_lock); + + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + if (check_state != ALLOCATED_MAPPED) { + dev_dbg(kbdev->dev, + "%s: state changed to %d (was %d), abort page migration", __func__, + check_state, ALLOCATED_MAPPED); + ret = -EAGAIN; + goto page_state_change_out; + } else if (vmap_count > 0) { + dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration", + __func__); + ret = -EAGAIN; + goto page_state_change_out; + } + } else { + if (check_state != PT_MAPPED) { + dev_dbg(kbdev->dev, + "%s: state changed to %d (was %d), abort PGD page migration", + __func__, check_state, PT_MAPPED); + WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS); + ret = -EAGAIN; + goto page_state_change_out; + } + } + + ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL); + if (ret) { + dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__); + goto get_pgd_at_level_error; + } + + pgd_page = kmap(phys_to_page(pgd)); + if (!pgd_page) { + dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__); + ret = -EINVAL; + goto pgd_page_map_error; + } + + mutex_lock(&kbdev->pm.lock); + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Lock MMU region and flush GPU cache by using GPU control, + * in order to keep MMU region locked. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) { + /* Defer the migration as L2 is in a transitional phase */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->pm.lock); + dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__); + ret = -EAGAIN; + goto l2_state_defer_out; + } + /* Prevent transitional phases in L2 by starting the transaction */ + mmu_page_migration_transaction_begin(kbdev); + if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + int as_nr = mmut->kctx->as_nr; + struct kbase_as *as = &kbdev->as[as_nr]; + + ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param); + if (!ret) { + ret = kbase_gpu_cache_flush_and_busy_wait( + kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); + } + if (ret) + mmu_page_migration_transaction_end(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + + if (ret < 0) { + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->pm.lock); + dev_err(kbdev->dev, + "%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.", + __func__); + goto gpu_reset; + } + + /* Copy memory content. + * + * It is necessary to claim the ownership of the DMA buffer for the old + * page before performing the copy, to make sure of reading a consistent + * version of its content, before copying. After the copy, ownership of + * the DMA buffer for the new page is given to the GPU in order to make + * the content visible to potential GPU access that may happen as soon as + * this function releases the lock on the MMU region. + */ + dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + memcpy(new_page, old_page, PAGE_SIZE); + dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Remap GPU virtual page. + * + * This code rests on the assumption that page migration is only enabled + * for 4 kB pages, that necessarily live in the bottom level of the MMU + * page table. For this reason, the PGD level tells us inequivocably + * whether the page being migrated is a "content page" or another PGD + * of the page table: + * + * - Bottom level implies ATE (Address Translation Entry) + * - Any other level implies PTE (Page Table Entry) + * + * The current implementation doesn't handle the case of a level 0 PGD, + * that is: the root PGD of the page table. + */ + target = &pgd_page[index]; + + /* Certain entries of a page table page encode the count of valid entries + * present in that page. So need to save & restore the count information + * when updating the PTE/ATE to point to the new page. + */ + num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); + + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + WARN_ON_ONCE((*target & 1UL) == 0); + *target = + kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags, + level, page_md->data.mapped.reg->gpu_alloc->group_id); + } else { + u64 managed_pte; + +#ifdef CONFIG_MALI_BIFROST_DEBUG + /* The PTE should be pointing to the page being migrated */ + WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr( + kbdev->mgm_dev->ops.mgm_pte_to_original_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index]))); +#endif + kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys)); + *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); + } + + kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + + /* This function always updates a single entry inside an existing PGD, + * therefore cache maintenance is necessary and affects a single entry. + */ + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), + kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); + + /* Unlock MMU region. + * + * Notice that GPUs which don't issue flush commands via GPU control + * still need an additional GPU cache flush here, this time only + * for the page table, because the function call above to sync PGDs + * won't have any effect on them. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + int as_nr = mmut->kctx->as_nr; + struct kbase_as *as = &kbdev->as[as_nr]; + + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param); + } else { + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, + GPU_COMMAND_CACHE_CLN_INV_L2); + if (!ret) + ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param); + } + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + /* Releasing locks before checking the migration transaction error state */ + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + /* Release the transition prevention in L2 by ending the transaction */ + mmu_page_migration_transaction_end(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + + /* Checking the final migration transaction error state */ + if (ret < 0) { + dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__); + goto gpu_reset; + } + + /* Undertaking metadata transfer, while we are holding the mmu_lock */ + spin_lock(&page_md->migrate_lock); + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + size_t page_array_index = + page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn; + + WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED); + + /* Replace page in array of pages of the physical allocation. */ + page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys; + } + /* Update the new page dma_addr with the transferred metadata from the old_page */ + page_md->dma_addr = new_dma_addr; + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + spin_unlock(&page_md->migrate_lock); + set_page_private(as_page(new_phys), (unsigned long)page_md); + /* Old page metatdata pointer cleared as it now owned by the new page */ + set_page_private(as_page(old_phys), 0); + +l2_state_defer_out: + kunmap(phys_to_page(pgd)); +pgd_page_map_error: +get_pgd_at_level_error: +page_state_change_out: + mutex_unlock(&mmut->mmu_lock); + + kunmap(as_page(new_phys)); +new_page_map_error: + kunmap(as_page(old_phys)); +old_page_map_error: + return ret; + +gpu_reset: + /* Unlock the MMU table before resetting the GPU and undo + * mappings. + */ + mutex_unlock(&mmut->mmu_lock); + kunmap(phys_to_page(pgd)); + kunmap(as_page(new_phys)); + kunmap(as_page(old_phys)); + + /* Reset the GPU because of an unrecoverable error in locking or flushing. */ + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + + return ret; +} + +static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t pgd, unsigned int level) { u64 *pgd_page; int i; struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; u64 *pgd_page_buffer = NULL; + bool page_is_isolated = false; + struct page *p = phys_to_page(pgd); lockdep_assert_held(&mmut->mmu_lock); - pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + pgd_page = kmap_atomic(p); /* kmap_atomic should NEVER fail. */ - if (WARN_ON(pgd_page == NULL)) + if (WARN_ON_ONCE(pgd_page == NULL)) return; if (level < MIDGARD_MMU_BOTTOMLEVEL) { /* Copy the page to our preallocated buffer so that we can minimize @@ -2761,6 +3346,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev, memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); } + /* When page migration is enabled, kbase_region_tracker_term() would ensure + * there are no pages left mapped on the GPU for a context. Hence the count + * of valid entries is expected to be zero here. + */ + if (kbase_page_migration_enabled && mmut->kctx) + WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page)); /* Invalidate page after copying */ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); kunmap_atomic(pgd_page); @@ -2779,7 +3370,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev, } } - kbase_mmu_free_pgd(kbdev, mmut, pgd); + /* Top level PGD page is excluded from migration process. */ + if (level != MIDGARD_MMU_TOPLEVEL) + page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); + + if (likely(!page_is_isolated)) + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, @@ -2836,6 +3432,10 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { int level; + WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", + mmut->kctx->tgid, mmut->kctx->id); + if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) { mutex_lock(&mmut->mmu_lock); mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); @@ -2855,11 +3455,26 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) mutex_destroy(&mmut->mmu_lock); } -void kbase_mmu_as_term(struct kbase_device *kbdev, int i) +void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i) { destroy_workqueue(kbdev->as[i].pf_wq); } +void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, + phys_addr_t phys, size_t size, + enum kbase_mmu_op_type flush_op) +{ +#if MALI_USE_CSF + unsigned long irq_flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && + kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) + mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); +#endif +} + #ifdef CONFIG_MALI_VECTOR_DUMP static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) @@ -2881,7 +3496,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); return 0; } @@ -3035,8 +3650,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data) #ifdef CONFIG_MALI_ARBITER_SUPPORT /* check if we still have GPU */ if (unlikely(kbase_is_gpu_removed(kbdev))) { - dev_dbg(kbdev->dev, - "%s: GPU has been removed\n", __func__); + dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); return; diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h index 848570f2a6dd..247a67c50da8 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h @@ -29,6 +29,7 @@ struct kbase_context; struct kbase_mmu_table; +struct kbase_va_region; /** * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info. @@ -49,6 +50,26 @@ enum kbase_caller_mmu_sync_info { CALLER_MMU_ASYNC }; +/** + * enum kbase_mmu_op_type - enum for MMU operations + * @KBASE_MMU_OP_NONE: To help catch uninitialized struct + * @KBASE_MMU_OP_FIRST: The lower boundary of enum + * @KBASE_MMU_OP_LOCK: Lock memory region + * @KBASE_MMU_OP_UNLOCK: Unlock memory region + * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only) + * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC) + * @KBASE_MMU_OP_COUNT: The upper boundary of enum + */ +enum kbase_mmu_op_type { + KBASE_MMU_OP_NONE = 0, /* Must be zero */ + KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */ + KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST, + KBASE_MMU_OP_UNLOCK, + KBASE_MMU_OP_FLUSH_PT, + KBASE_MMU_OP_FLUSH_MEM, + KBASE_MMU_OP_COUNT /* Must be the last in enum */ +}; + /** * kbase_mmu_as_init() - Initialising GPU address space object. * @@ -60,7 +81,7 @@ enum kbase_caller_mmu_sync_info { * * Return: 0 on success and non-zero value on failure. */ -int kbase_mmu_as_init(struct kbase_device *kbdev, int i); +int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i); /** * kbase_mmu_as_term() - Terminate address space object. @@ -71,7 +92,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, int i); * This is called upon device termination to destroy * the address space object of the device. */ -void kbase_mmu_as_term(struct kbase_device *kbdev, int i); +void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i); /** * kbase_mmu_init - Initialise an object representing GPU page tables @@ -132,22 +153,92 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev, int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id, u64 *dirty_pgds); -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info); + unsigned long flags, int group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration); +int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, + int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg, bool ignore_page_migration); int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, size_t nr, unsigned long flags, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info); int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr); + struct tagged_addr *phys, size_t nr, int as_nr, + bool ignore_page_migration); int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id); +#if MALI_USE_CSF +/** + * kbase_mmu_update_csf_mcu_pages - Update MCU mappings with changes of phys and flags + * + * @kbdev: Pointer to kbase device. + * @vpfn: Virtual PFN (Page Frame Number) of the first page to update + * @phys: Pointer to the array of tagged physical addresses of the physical + * pages that are pointed to by the page table entries (that need to + * be updated). + * @nr: Number of pages to update + * @flags: Flags + * @group_id: The physical memory group in which the page was allocated. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: 0 on success, otherwise an error code. + */ +int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys, + size_t nr, unsigned long flags, int const group_id); +#endif + +/** + * kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages + * + * @old_phys: Old physical page to be replaced. + * @new_phys: New physical page used to replace old physical page. + * @old_dma_addr: DMA address of the old page. + * @new_dma_addr: DMA address of the new page. + * @level: MMU page table level of the provided PGD. + * + * The page migration process is made of 2 big steps: + * + * 1) Copy the content of the old page to the new page. + * 2) Remap the virtual page, that is: replace either the ATE (if the old page + * was a regular page) or the PTE (if the old page was used as a PGD) in the + * MMU page table with the new page. + * + * During the process, the MMU region is locked to prevent GPU access to the + * virtual memory page that is being remapped. + * + * Before copying the content of the old page to the new page and while the + * MMU region is locked, a GPU cache flush is performed to make sure that + * pending GPU writes are finalized to the old page before copying. + * That is necessary because otherwise there's a risk that GPU writes might + * be finalized to the old page, and not new page, after migration. + * The MMU region is unlocked only at the end of the migration operation. + * + * Return: 0 on success, otherwise an error code. + */ +int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, + dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level); + +/** + * kbase_mmu_flush_pa_range() - Flush physical address range from the GPU caches + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @kctx: Pointer to kbase context, it can be NULL if the physical address + * range is not associated with User created context. + * @phys: Starting address of the physical range to start the operation on. + * @size: Number of bytes to work on. + * @flush_op: Type of cache flush operation to perform. + * + * Issue a cache flush physical range command. This function won't perform any + * flush if the GPU doesn't support FLUSH_PA_RANGE command. The flush would be + * performed only if the context has a JASID assigned to it. + * This function is basically a wrapper for kbase_gpu_cache_flush_pa_range_and_busy_wait(). + */ +void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, + phys_addr_t phys, size_t size, + enum kbase_mmu_op_type flush_op); /** * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h index 09b3fa809bea..50d2ea5d07c8 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h @@ -54,26 +54,6 @@ enum kbase_mmu_fault_type { KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED }; -/** - * enum kbase_mmu_op_type - enum for MMU operations - * @KBASE_MMU_OP_NONE: To help catch uninitialized struct - * @KBASE_MMU_OP_FIRST: The lower boundary of enum - * @KBASE_MMU_OP_LOCK: Lock memory region - * @KBASE_MMU_OP_UNLOCK: Unlock memory region - * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only) - * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC) - * @KBASE_MMU_OP_COUNT: The upper boundary of enum - */ -enum kbase_mmu_op_type { - KBASE_MMU_OP_NONE = 0, /* Must be zero */ - KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */ - KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST, - KBASE_MMU_OP_UNLOCK, - KBASE_MMU_OP_FLUSH_PT, - KBASE_MMU_OP_FLUSH_MEM, - KBASE_MMU_OP_COUNT /* Must be the last in enum */ -}; - /** * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions * @vpfn: MMU Virtual Page Frame Number to start the operation on. @@ -104,6 +84,22 @@ struct kbase_mmu_hw_op_param { void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as); +/** + * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program + * the LOCKADDR register. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * hwaccess_lock needs to be held when calling this function. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + /** * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without * programming the LOCKADDR register and wait @@ -114,6 +110,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, * @op_param: Pointer to struct containing information about the MMU * operation to perform. * + * This function should be called for GPU where GPU command is used to flush + * the cache(s) instead of MMU command. + * * Return: 0 if issuing the command was successful, otherwise an error code. */ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, @@ -145,7 +144,7 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, * GPUs where MMU command to flush the cache(s) is deprecated. * mmu_hw_mutex needs to be held when calling this function. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); @@ -164,7 +163,7 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this * function. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); @@ -181,7 +180,7 @@ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as * specified inside @op_param. GPU command is used to flush the cache(s) * instead of the MMU command. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c index cc764b483f05..858d4bf6edcd 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c @@ -424,6 +424,14 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, return ret; } +int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return mmu_hw_do_lock(kbdev, as, op_param); +} + int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { diff --git a/drivers/gpu/arm/bifrost/tests/Mconfig b/drivers/gpu/arm/bifrost/tests/Mconfig deleted file mode 100644 index 67b38a28cf96..000000000000 --- a/drivers/gpu/arm/bifrost/tests/Mconfig +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -# -# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU license. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# - -menuconfig MALI_KUTF - bool "Build Mali Kernel Unit Test Framework modules" - depends on MALI_BIFROST && MALI_BIFROST_DEBUG - default y if BACKEND_KERNEL && MALI_BIFROST_DEBUG - help - This option will build the Mali testing framework modules. - - Modules: - - kutf.ko - - kutf_test.ko - -config MALI_KUTF_IRQ_TEST - bool "Build Mali KUTF IRQ test module" - depends on MALI_KUTF - default y - help - This option will build the IRQ latency measurement test module. - It can determine the latency of the Mali GPU IRQ on your system. - - Modules: - - mali_kutf_irq_test.ko - -config MALI_KUTF_CLK_RATE_TRACE - bool "Build Mali KUTF Clock rate trace test module" - depends on MALI_KUTF - default y - help - This option will build the clock rate trace portal test module. - It can test the clocks integration into the platform and exercise some - basic trace test in the system. - - Modules: - - mali_kutf_clk_rate_trace_test_portal.ko - -config MALI_KUTF_MGM_INTEGRATION_TEST - bool "Build Mali KUTF MGM integration test module" - depends on MALI_KUTF - default y - help - This option will build the MGM integration test module. - It can test the implementation of PTE translation for specific - group ids. - - Modules: - - mali_kutf_mgm_integration_test.ko - - -# Enable MALI_BIFROST_DEBUG for KUTF modules support - -config UNIT_TEST_KERNEL_MODULES - bool - default y if UNIT_TEST_CODE && BACKEND_KERNEL - default n diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h index c4c713c838cf..3f68efa4257d 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,6 +31,7 @@ */ #include +#include /** * kutf_helper_pending_input() - Check any pending lines sent by user space @@ -81,4 +82,28 @@ int kutf_helper_input_enqueue(struct kutf_context *context, */ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context); +/** + * kutf_helper_ignore_dmesg() - Write message in dmesg to instruct parser + * to ignore errors, until the counterpart + * is written to dmesg to stop ignoring errors. + * @dev: Device pointer to write to dmesg using. + * + * This function writes "Start ignoring dmesg warnings" to dmesg, which + * the parser will read and not log any errors. Only to be used in cases where + * we expect an error to be produced in dmesg but that we do not want to be + * flagged as an error. + */ +void kutf_helper_ignore_dmesg(struct device *dev); + +/** + * kutf_helper_stop_ignoring_dmesg() - Write message in dmesg to instruct parser + * to stop ignoring errors. + * @dev: Device pointer to write to dmesg using. + * + * This function writes "Stop ignoring dmesg warnings" to dmesg, which + * the parser will read and continue to log any errors. Counterpart to + * kutf_helper_ignore_dmesg(). + */ +void kutf_helper_stop_ignoring_dmesg(struct device *dev); + #endif /* _KERNEL_UTF_HELPERS_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c index d207d1c6e04f..42736195e071 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -127,3 +127,15 @@ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context) { kutf_helper_input_enqueue(context, NULL, 0); } + +void kutf_helper_ignore_dmesg(struct device *dev) +{ + dev_info(dev, "KUTF: Start ignoring dmesg warnings\n"); +} +EXPORT_SYMBOL(kutf_helper_ignore_dmesg); + +void kutf_helper_stop_ignoring_dmesg(struct device *dev) +{ + dev_info(dev, "KUTF: Stop ignoring dmesg warnings\n"); +} +EXPORT_SYMBOL(kutf_helper_stop_ignoring_dmesg); diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c index 644d69bc209d..359d06371aff 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c @@ -30,6 +30,11 @@ #include #include +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include +#endif + /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, loff_t *f_pos); diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h index 6660cf5bc276..c1428495b11c 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,17 +27,13 @@ #include /* The maximum size of a single packet used by timeline. */ -#define PACKET_SIZE 4096 /* bytes */ +#define PACKET_SIZE 4096 /* bytes */ /* The number of packets used by one timeline stream. */ -#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP) - #define PACKET_COUNT 64 -#else - #define PACKET_COUNT 32 -#endif +#define PACKET_COUNT 128 /* The maximum expected length of string in tracepoint descriptor. */ -#define STRLEN_MAX 64 /* bytes */ +#define STRLEN_MAX 64 /* bytes */ /** * struct kbase_tlstream - timeline stream structure diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c index fd0d0c01adde..e8a74e9dafa6 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c @@ -84,6 +84,7 @@ enum tl_msg_id_obj { KBASE_TL_ATTRIB_ATOM_PRIORITIZED, KBASE_TL_ATTRIB_ATOM_JIT, KBASE_TL_KBASE_NEW_DEVICE, + KBASE_TL_KBASE_GPUCMDQUEUE_KICK, KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, KBASE_TL_KBASE_DEVICE_HALT_CSG, @@ -352,6 +353,10 @@ enum tl_msg_id_obj { "New KBase Device", \ "@IIIIIII", \ "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_GPUCMDQUEUE_KICK, \ + "Kernel receives a request to process new GPU queue instructions", \ + "@IL", \ + "kernel_ctx_id,buffer_gpu_addr") \ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ "CSG is programmed to a slot", \ "@IIIII", \ @@ -2092,6 +2097,33 @@ void __kbase_tlstream_tl_kbase_new_device( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u64 buffer_gpu_addr +) +{ + const u32 msg_id = KBASE_TL_KBASE_GPUCMDQUEUE_KICK; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(buffer_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &buffer_gpu_addr, sizeof(buffer_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_tl_kbase_device_program_csg( struct kbase_tlstream *stream, u32 kbase_device_id, diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h index be0c62edecd7..c690a75fe22c 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h @@ -396,6 +396,12 @@ void __kbase_tlstream_tl_kbase_new_device( u32 kbase_device_supports_gpu_sleep ); +void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u64 buffer_gpu_addr +); + void __kbase_tlstream_tl_kbase_device_program_csg( struct kbase_tlstream *stream, u32 kbase_device_id, @@ -1981,6 +1987,37 @@ struct kbase_tlstream; do { } while (0) #endif /* MALI_USE_CSF */ +/** + * KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK - Kernel receives a request to process new GPU queue instructions + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @buffer_gpu_addr: Address of the GPU queue's command buffer + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \ + kbdev, \ + kernel_ctx_id, \ + buffer_gpu_addr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_gpucmdqueue_kick( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kernel_ctx_id, \ + buffer_gpu_addr \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \ + kbdev, \ + kernel_ctx_id, \ + buffer_gpu_addr \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + /** * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - CSG is programmed to a slot * diff --git a/drivers/hwtracing/coresight/mali/Kbuild b/drivers/hwtracing/coresight/mali/Kbuild new file mode 100644 index 000000000000..4d7d665f5652 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/Kbuild @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +# make $(src) as absolute path if it is not already, by prefixing $(srctree) +# This is to prevent any build issue due to wrong path. +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) + +# +# ccflags +# +ccflags-y += \ + -I$(srctree)/include/linux \ + -I$(src) \ + -I$(srctree)/drivers/hwtracing/coresight/ \ + -I$(src)/../../../../include + +subdir-ccflags-y += $(ccflags-y) + +ifneq ($(CONFIG_CORESIGHT), n) +ifneq ($(CONFIG_CORESIGHT), ) + + +ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ETM),y) + obj-m += coresight_mali_source_etm.o + coresight_mali_source_etm-y := \ + sources/etm/coresight_mali_source_etm_core.o \ + sources/coresight_mali_sources.o \ + coresight_mali_common.o +endif + +ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ITM),y) + obj-m += coresight_mali_source_itm.o + coresight_mali_source_itm-y := \ + sources/itm/coresight_mali_source_itm_core.o \ + sources/coresight_mali_sources.o \ + coresight_mali_common.o +endif + +ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ELA),y) + obj-m += coresight_mali_source_ela.o + coresight_mali_source_ela-y := \ + sources/ela/coresight_mali_source_ela_core.o \ + sources/coresight_mali_sources.o \ + coresight_mali_common.o +endif + +endif +endif diff --git a/drivers/hwtracing/coresight/mali/Kconfig b/drivers/hwtracing/coresight/mali/Kconfig new file mode 100644 index 000000000000..283e2b56b641 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/Kconfig @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + + +config CORESIGHT_MALI_SOURCES_ETM + depends on MALI_CORESIGHT && ARM64 + bool "Enable CoreSight Mali Sources ETM module" + default y + help + This option will build coresight ETM source driver, + that is used for configuring, enabling and disabling + the ETM component. + +config CORESIGHT_MALI_SOURCES_ITM + depends on MALI_CORESIGHT + bool "Enable CoreSight Mali Sources ITM module" + default y + help + This option will build coresight ITM source driver, + that is used for configuring, enabling and disabling + the ITM component. + +config CORESIGHT_MALI_SOURCES_ELA + depends on MALI_CORESIGHT + bool "Enable CoreSight Mali Sources ELA module" + default y + help + This option will build coresight ELA source driver, + that is used for configuring, enabling and disabling + the ELA component. diff --git a/drivers/hwtracing/coresight/mali/Makefile b/drivers/hwtracing/coresight/mali/Makefile new file mode 100644 index 000000000000..a6b5622c92ae --- /dev/null +++ b/drivers/hwtracing/coresight/mali/Makefile @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build +KDIR ?= $(KERNEL_SRC) + +ifeq ($(KDIR),) + $(error Must specify KDIR to point to the kernel to target)) +endif + +CONFIG_MALI_CORESIGHT ?= n +ifeq ($(CONFIG_MALI_CORESIGHT),y) + + ifeq ($(CONFIG_ARM64), y) + CONFIG_CORESIGHT_MALI_SOURCES_ETM ?= y + endif + + CONFIG_CORESIGHT_MALI_SOURCES_ITM ?= y + CONFIG_CORESIGHT_MALI_SOURCES_ELA ?= y +endif + +CONFIGS := \ + CONFIG_MALI_CORESIGHT \ + CONFIG_CORESIGHT_MALI_SOURCES_ETM \ + CONFIG_CORESIGHT_MALI_SOURCES_ITM \ + CONFIG_CORESIGHT_MALI_SOURCES_ELA + + +# +# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs and values. +# $(value config) is the name of the CONFIG option. +# $(value $(value config)) is its value (y, m). +# When the CONFIG is not set to y or m, it defaults to n. +MAKE_ARGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + $(value config)=$(value $(value config)), \ + $(value config)=n)) + +# +# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs defines with values from CONFIGS. +# $(value config) is the name of the CONFIG option. +# When set to y or m, the CONFIG gets defined to 1. +EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + -D$(value config)=1)) + +# +# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions +# +EXTRA_SYMBOLS += $(CURDIR)/../../../../../drivers/gpu/arm/midgard/Module.symvers +EXTRA_SYMBOLS += $(CURDIR)/../../../../../drivers/hwtracing/coresight/mali/Module.symvers + +# The following were added to align with W=1 in scripts/Makefile.extrawarn +# from the Linux source tree +KBUILD_CFLAGS += -Wall -Werror +KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter +KBUILD_CFLAGS += -Wmissing-declarations +KBUILD_CFLAGS += -Wmissing-format-attribute +KBUILD_CFLAGS += -Wmissing-prototypes +KBUILD_CFLAGS += -Wold-style-definition +KBUILD_CFLAGS += -Wmissing-include-dirs +KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) +KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) +KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) +KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) +# The following turn off the warnings enabled by -Wextra +KBUILD_CFLAGS += -Wno-missing-field-initializers +KBUILD_CFLAGS += -Wno-sign-compare +KBUILD_CFLAGS += -Wno-type-limits + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 + +all: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +modules_install: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install + +clean: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean diff --git a/drivers/hwtracing/coresight/mali/build.bp b/drivers/hwtracing/coresight/mali/build.bp new file mode 100644 index 000000000000..824ae54c9e43 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/build.bp @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +bob_defaults { + name: "coresight_mali_defaults", + srcs: [ + "Makefile", + "Kbuild", + "coresight_mali_common.c", + "coresight_mali_common.h", + ], +} + +bob_defaults { + name: "coresight_mali_source_defaults", + srcs: [ + "sources/coresight_mali_sources.c", + "sources/coresight_mali_sources.h", + ], +} + + +bob_kernel_module { + name: "coresight_mali_source_etm", + defaults: [ + "kernel_defaults", + "coresight_mali_defaults", + "coresight_mali_source_defaults", + ], + srcs: [ + "sources/etm/coresight_mali_source_etm_core.c", + ], + extra_symbols: [ + "mali_kbase", + ], + enabled: false, + mali_coresight: { + kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ETM=y"], + enabled: true, + }, +} + +bob_kernel_module { + name: "coresight_mali_source_itm", + defaults: [ + "kernel_defaults", + "coresight_mali_defaults", + "coresight_mali_source_defaults", + ], + srcs: [ + "sources/itm/coresight_mali_source_itm_core.c", + ], + extra_symbols: [ + "mali_kbase", + ], + enabled: false, + mali_coresight: { + kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ITM=y"], + enabled: true, + }, +} + +bob_kernel_module { + name: "coresight_mali_source_ela", + defaults: [ + "kernel_defaults", + "coresight_mali_defaults", + "coresight_mali_source_defaults", + ], + srcs: [ + "sources/ela/coresight_mali_source_ela_core.c", + "sources/ela/coresight-ela600.h" + ], + extra_symbols: [ + "mali_kbase", + ], + enabled: false, + mali_coresight: { + kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ELA=y"], + enabled: true, + }, +} diff --git a/drivers/hwtracing/coresight/mali/coresight_mali_common.c b/drivers/hwtracing/coresight/mali/coresight_mali_common.c new file mode 100644 index 000000000000..8e3af76c5267 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/coresight_mali_common.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include + +#include +#include "coresight_mali_common.h" + +int coresight_mali_enable_component(struct coresight_device *csdev, u32 mode) +{ + struct coresight_mali_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int res = 0; + + if (mode != CS_MODE_SYSFS) { + dev_err(drvdata->dev, "Unsupported Mali CS_MODE: %d, expected: %d\n", mode, + CS_MODE_SYSFS); + return -EINVAL; + } + + drvdata->mode = mode; + + res = kbase_debug_coresight_csf_config_enable(drvdata->config); + if (res) { + dev_err(drvdata->dev, "Config failed to enable with error code %d\n", res); + drvdata->mode = CS_MODE_DISABLED; + } + + return res; +} + +int coresight_mali_disable_component(struct coresight_device *csdev) +{ + struct coresight_mali_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int res = 0; + + res = kbase_debug_coresight_csf_config_disable(drvdata->config); + if (res) + dev_err(drvdata->dev, "config failed to disable with error code %d\n", res); + + drvdata->mode = CS_MODE_DISABLED; + + return res; +} diff --git a/drivers/hwtracing/coresight/mali/coresight_mali_common.h b/drivers/hwtracing/coresight/mali/coresight_mali_common.h new file mode 100644 index 000000000000..43154c1f639d --- /dev/null +++ b/drivers/hwtracing/coresight/mali/coresight_mali_common.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _CORESIGHT_MALI_COMMON_H +#define _CORESIGHT_MALI_COMMON_H + +#include +#include + +/* Macros for CoreSight OP types. */ +#define WRITE_IMM_OP(_reg_addr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM, \ + .op.write_imm.reg_addr = _reg_addr, .op.write_imm.val = _val \ + } + +#define WRITE_RANGE_OP(_reg_start, _reg_end, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE, \ + .op.write_imm_range.reg_start = _reg_start, \ + .op.write_imm_range.reg_end = _reg_end, .op.write_imm_range.val = _val \ + } + +#define WRITE_PTR_OP(_reg_addr, _ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE, .op.write.reg_addr = _reg_addr, \ + .op.write.ptr = _ptr \ + } + +#define READ_OP(_reg_addr, _ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ, .op.read.reg_addr = _reg_addr, \ + .op.read.ptr = _ptr \ + } + +#define POLL_OP(_reg_addr, _mask, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL, .op.poll.reg_addr = _reg_addr, \ + .op.poll.mask = _mask, .op.poll.val = _val \ + } + +#define BIT_OR_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ + } + +#define BIT_XOR_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ + } + +#define BIT_AND_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ + } + +#define BIT_NOT_OP(_ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT, .op.bitw.ptr = _ptr, \ + } + +#ifndef CS_MALI_UNLOCK_COMPONENT +/** + * CS_MALI_UNLOCK_COMPONENT - A write of 0xC5ACCE55 enables write access to the block + */ +#define CS_MALI_UNLOCK_COMPONENT 0xC5ACCE55 +#endif + +/** + * struct coresight_mali_drvdata - Coresight mali driver data + * + * @csdev: Coresight device pointer + * @dev: Device pointer + * @kbase_client: Pointer to coresight mali client + * @config: Pointer to coresight mali config, used for enabling and + * disabling the coresight component + * @enable_seq: Enable sequence needed to enable coresight block + * @disable_seq: Disable sequence needed to enable coresight block + * @gpu_dev: Pointer to gpu device structure + * @mode: Mode in which the driver operates + */ +struct coresight_mali_drvdata { + struct coresight_device *csdev; + struct device *dev; + void *kbase_client; + void *config; + struct kbase_debug_coresight_csf_sequence enable_seq; + struct kbase_debug_coresight_csf_sequence disable_seq; + void *gpu_dev; + u32 mode; +}; + +/** + * coresight_mali_enable_component - Generic enable for a coresight block + * + * @csdev: Coresight device to be enabled + * @mode: Mode in which the block should start operating in + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_enable_component(struct coresight_device *csdev, u32 mode); + +/** + * coresight_mali_disable_component - Generic disable for a coresight block + * + * @csdev: Coresight device to be disabled + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_disable_component(struct coresight_device *csdev); + +#endif /* _CORESIGHT_MALI_COMMON_H */ diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c new file mode 100644 index 000000000000..e6d2dc71096b --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "sources/coresight_mali_sources.h" + +static int coresight_mali_source_trace_id(struct coresight_device *csdev) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + return drvdata->trcid; +} + +static int coresight_mali_enable_source(struct coresight_device *csdev, struct perf_event *event, + u32 mode) +{ + return coresight_mali_enable_component(csdev, mode); +} + +static void coresight_mali_disable_source(struct coresight_device *csdev, struct perf_event *event) +{ + coresight_mali_disable_component(csdev); +} + +static const struct coresight_ops_source coresight_mali_source_ops = { + .trace_id = coresight_mali_source_trace_id, + .enable = coresight_mali_enable_source, + .disable = coresight_mali_disable_source +}; + +static const struct coresight_ops mali_cs_ops = { + .source_ops = &coresight_mali_source_ops, +}; + +int coresight_mali_sources_probe(struct platform_device *pdev) +{ + int ret = 0; + struct coresight_platform_data *pdata = NULL; + struct coresight_mali_source_drvdata *drvdata = NULL; + struct coresight_desc desc = { 0 }; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct platform_device *gpu_pdev = NULL; + struct device_node *gpu_node = NULL; + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + dev_set_drvdata(dev, drvdata); + drvdata->base.dev = dev; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + pdata = coresight_get_platform_data(dev); +#else + if (np) + pdata = of_get_coresight_platform_data(dev, np); +#endif + if (IS_ERR(pdata)) { + dev_err(drvdata->base.dev, "Failed to get platform data\n"); + ret = PTR_ERR(pdata); + goto devm_kfree_drvdata; + } + + dev->platform_data = pdata; + + gpu_node = of_parse_phandle(np, "gpu", 0); + if (!gpu_node) { + dev_err(drvdata->base.dev, "GPU node not available\n"); + goto devm_kfree_drvdata; + } + gpu_pdev = of_find_device_by_node(gpu_node); + if (gpu_pdev == NULL) { + dev_err(drvdata->base.dev, "Couldn't find GPU device from node\n"); + goto devm_kfree_drvdata; + } + + drvdata->base.gpu_dev = platform_get_drvdata(gpu_pdev); + if (!drvdata->base.gpu_dev) { + dev_err(drvdata->base.dev, "GPU dev not available\n"); + goto devm_kfree_drvdata; + } + + ret = coresight_mali_sources_init_drvdata(drvdata); + if (ret) { + dev_err(drvdata->base.dev, "Failed to init source driver data\n"); + goto kbase_client_unregister; + } + + desc.type = CORESIGHT_DEV_TYPE_SOURCE; + desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE; + desc.ops = &mali_cs_ops; + desc.pdata = pdata; + desc.dev = dev; + desc.groups = coresight_mali_source_groups_get(); + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + desc.name = devm_kasprintf(dev, GFP_KERNEL, "%s", drvdata->type_name); + if (!desc.name) { + ret = -ENOMEM; + goto devm_kfree_drvdata; + } +#endif + drvdata->base.csdev = coresight_register(&desc); + if (IS_ERR(drvdata->base.csdev)) { + dev_err(drvdata->base.dev, "Failed to register coresight device\n"); + ret = PTR_ERR(drvdata->base.csdev); + goto devm_kfree_drvdata; + } + + return ret; + +kbase_client_unregister: + if (drvdata->base.csdev != NULL) + coresight_unregister(drvdata->base.csdev); + + coresight_mali_sources_deinit_drvdata(drvdata); + +devm_kfree_drvdata: + devm_kfree(dev, drvdata); + + return ret; +} + +int coresight_mali_sources_remove(struct platform_device *pdev) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(&pdev->dev); + + if (drvdata->base.csdev != NULL) + coresight_unregister(drvdata->base.csdev); + + coresight_mali_sources_deinit_drvdata(drvdata); + + devm_kfree(&pdev->dev, drvdata); + + return 0; +} + +MODULE_AUTHOR("ARM Ltd."); +MODULE_DESCRIPTION("Arm Coresight Mali source"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h new file mode 100644 index 000000000000..0915dd107488 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _CORESIGHT_MALI_SOURCES_H +#define _CORESIGHT_MALI_SOURCES_H + +#include +#include +#include + +#include "coresight_mali_common.h" + +/** + * struct coresight_mali_source_drvdata - Coresight mali source driver data + * + * @base: Common driver data structure between coresight mali sources and sinks + * @trcid: Trace id + * @type_name: Type name of the driver, for example "itm" or "etm" + */ +struct coresight_mali_source_drvdata { + struct coresight_mali_drvdata base; + u32 trcid; +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + char *type_name; +#endif +}; + +/** + * coresight_mali_sources_probe - Generic probe for a coresight mali source + * + * @pdev: Pointer to a platform device + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_sources_probe(struct platform_device *pdev); + +/** + * coresight_mali_sources_remove - Generic remove for a coresight mali source + * + * @pdev: Pointer to a platform device + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_sources_remove(struct platform_device *pdev); + +/** + * coresight_mali_sources_init_drvdata - Driver data initialization hook. + * + * @drvdata: Driver data structure to initialize + * + * Used for initializing source specific enable and disable sequences and other relevant data. + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata); + +/** + * coresight_mali_sources_deinit_drvdata - Driver data deinitialization hook. + * + * @drvdata: Driver data structure to deinitialize + * + * Used for releasing source specific enable and disable sequences and other relevant data. + */ +void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata); + +/** + * coresight_mali_source_groups_get - Getter for source groups. + * + * Return: a pointer to an array of attribute groups of the driver. Can also be NULL. + * + * Groups are drivers sysfs subnodes that can be used to read state of the coresight component + * or write component configuration. + */ +const struct attribute_group **coresight_mali_source_groups_get(void); + +#endif /* _CORESIGHT_MALI_SOURCES_H */ diff --git a/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h b/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h new file mode 100644 index 000000000000..0ee96bc2f047 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _CORESIGHT_ELA600_H +#define _CORESIGHT_ELA600_H + +#include + +#define ELA_CTRL 0x000 +#define ELA_TIMECTRL 0x004 +#define ELA_TSSR 0x008 +#define ELA_ATBCTRL 0x00C +#define ELA_PTACTION 0x010 +#define ELA_AUXCTRL 0x014 +#define ELA_CNTSEL 0x018 + +#define ELA_CTSR 0x020 +#define ELA_CCVR 0x024 +#define ELA_CAVR 0x028 +#define ELA_RDCAPTID 0x02C +#define ELA_RDCAPTIDEXT 0x030 + +#define ELA_RRAR 0x040 +#define ELA_RRDR 0x044 +#define ELA_RWAR 0x048 +#define ELA_RWDR 0x04C + +#define ELA_SIGSEL(x) (0x100 + 0x100 * (x)) +#define ELA_TRIGCTRL(x) (ELA_SIGSEL(x) + 0x004) +#define ELA_NEXTSTATE(x) (ELA_SIGSEL(x) + 0x008) +#define ELA_ACTION(x) (ELA_SIGSEL(x) + 0x00C) +#define ELA_ALTNEXTSTATE(x) (ELA_SIGSEL(x) + 0x010) +#define ELA_ALTACTION(x) (ELA_SIGSEL(x) + 0x014) +#define ELA_COMPCTRL(x) (ELA_SIGSEL(x) + 0x018) +#define ELA_ALTCOMPCTRL(x) (ELA_SIGSEL(x) + 0x01C) +#define ELA_COUNTCOMP(x) (ELA_SIGSEL(x) + 0x020) +#define ELA_TWBSEL(x) (ELA_SIGSEL(x) + 0x028) +#define ELA_EXTMASK(x) (ELA_SIGSEL(x) + 0x030) +#define ELA_EXTCOMP(x) (ELA_SIGSEL(x) + 0x034) +#define ELA_QUALMASK(x) (ELA_SIGSEL(x) + 0x038) +#define ELA_QUALCOMP(x) (ELA_SIGSEL(x) + 0x03C) +#define ELA_SIGMASK(x, y) (ELA_SIGSEL(x) + 0x040 + 4 * (y)) +#define ELA_SIGCOMP(x, y) (ELA_SIGSEL(x) + 0x080 + 4 * (y)) + +#define ELA_ITTRIGOUT 0xEE8 +#define ELA_ITATBDATA 0xEEC +#define ELA_ITATBCTR1 0xEF0 +#define ELA_ITATBCTR0 0xEF4 +#define ELA_ITTRIGIN 0xEF8 +#define ELA_ITCTRL 0xF00 + +#define ELA_AUTHSTATUS 0xFB8 + +#define ELA_DEVARCH 0xFBC +#define ELA_DEVID2 0xFC0 +#define ELA_DEVID1 0xFC4 +#define ELA_DEVID 0xFC8 +#define ELA_DEVTYPE 0xFCC + +#define ELA_PIDR4 0xFD0 +#define ELA_PIDR5 0xFD4 +#define ELA_PIDR6 0xFD8 +#define ELA_PIDR7 0xFDC +#define ELA_PIDR0 0xFE0 +#define ELA_PIDR1 0xFE4 +#define ELA_PIDR2 0xFE8 +#define ELA_PIDR3 0xFEC +#define ELA_CIDR0 0xFF0 +#define ELA_CIDR1 0xFF4 +#define ELA_CIDR2 0xFF8 +#define ELA_CIDR3 0xFFC + +/* REGISTER MASKS */ +#define ELA_CTRL_RUN BIT(0) +#define ELA_CTRL_TRACE_BUSY BIT(1) + +#define ELA_TIMECTRL_TSEN BIT(16) +#define ELA_TIMECTRL_TSINT GEN_MASK(15, 12) +#define ELA_TIMECTRL_TCSEL1 GEN_MASK(7, 4) +#define ELA_TIMECTRL_TCSEL0 GEN_MASK(3, 0) + +#define ELA_ATBCTRL_PREDICT BIT(31) +#define ELA_ATBCTRL_ATID_TRIG_EN BIT(15) +#define ELA_ATBCTRL_ATID_VALUE GEN_MASK(14, 8) +#define ELA_ATBCTRL_ASYNC_INTERVAL GEN_MASK(7, 0) + +#define ELA_ACTION_ELAOUTPUT GEN_MASK(7, 4) +#define ELA_ACTION_TRACE BIT(3) +#define ELA_ACTION_STOPCLOCK BIT(2) +#define ELA_ACTION_CTTRIGOUT GEN_MASK(1, 0) + +#define ELA_AUXCTRL_FLUSH_DIS BIT(0) + +#define ELA_SIGSEL_JCN_REQUEST BIT(0) +#define ELA_SIGSEL_JCN_RESPONSE BIT(1) +#define ELA_SIGSEL_CEU_EXECUTION BIT(2) +#define ELA_SIGSEL_MCU_AHBP BIT(3) +#define ELA_SIGSEL_HOST_AXI BIT(4) + +#define ELA_TRIGCTRL_ALTCOMPSEL BIT(15) +#define ELA_TRIGCTRL_ALTCOMP GEN_MASK(14, 12) +#define ELA_TRIGCTRL_CAPTID GEN_MASK(11, 10) +#define ELA_TRIGCTRL_COUNTBRK BIT(9) +#define ELA_TRIGCTRL_COUNTCLR BIT(8) +#define ELA_TRIGCTRL_TRACE GEN_MASK(7, 6) +#define ELA_TRIGCTRL_COUNTSRC BIT(5) +#define ELA_TRIGCTRL_WATCHRST BIT(4) +#define ELA_TRIGCTRL_COMPSEL BIT(3) +#define ELA_TRIGCTRL_COMP GEN_MASK(2, 0) + +#endif /* _CORESIGHT_ELA600_H */ diff --git a/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c b/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c new file mode 100644 index 000000000000..0da37a75ecfb --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c @@ -0,0 +1,666 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include "sources/coresight_mali_sources.h" +#include "coresight-ela600.h" + +/* Linux Coresight framework does not support multiple sources enabled + * at the same time. + * + * To avoid Kernel instability, all Mali Coresight sources use the + * same trace ID value as the mandatory ETM one. + */ +#define CS_MALI_TRACE_ID 0x00000010 + +#define CS_ELA_BASE_ADDR 0xE0043000 +#define CS_GPU_COMMAND_ADDR 0x40003030 +#define CS_GPU_COMMAND_TRACE_CONTROL_EN 0x000001DC +#define CS_ELA_MAX_SIGNAL_GROUPS 12U +#define CS_SG_NAME_MAX_LEN 10U +#define CS_ELA_NR_SIG_REGS 8U + +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +#define CS_ELA_SIGREGS_ATTR_RW(_a, _b) \ + static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf_regs(buf, CS_ELA_##_b##_0, CS_ELA_##_b##_7); \ + } \ + static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_8_regs(dev, buf, count, CS_ELA_##_b##_0); \ + } \ + static DEVICE_ATTR_RW(_a) + +enum cs_ela_dynamic_regs { + CS_ELA_TIMECTRL, + CS_ELA_TSSR, + + CS_ELA_SIGSEL0, + CS_ELA_COMPCTRL0, + CS_ELA_ALTCOMPCTRL0, + CS_ELA_TWBSEL0, + CS_ELA_QUALMASK0, + CS_ELA_QUALCOMP0, + CS_ELA_SIGMASK0_0, + CS_ELA_SIGMASK0_1, + CS_ELA_SIGMASK0_2, + CS_ELA_SIGMASK0_3, + CS_ELA_SIGMASK0_4, + CS_ELA_SIGMASK0_5, + CS_ELA_SIGMASK0_6, + CS_ELA_SIGMASK0_7, + CS_ELA_SIGCOMP0_0, + CS_ELA_SIGCOMP0_1, + CS_ELA_SIGCOMP0_2, + CS_ELA_SIGCOMP0_3, + CS_ELA_SIGCOMP0_4, + CS_ELA_SIGCOMP0_5, + CS_ELA_SIGCOMP0_6, + CS_ELA_SIGCOMP0_7, + + CS_ELA_SIGSEL4, + CS_ELA_NEXTSTATE4, + CS_ELA_ACTION4, + CS_ELA_ALTNEXTSTATE4, + CS_ELA_COMPCTRL4, + CS_ELA_TWBSEL4, + CS_ELA_SIGMASK4_0, + CS_ELA_SIGMASK4_1, + CS_ELA_SIGMASK4_2, + CS_ELA_SIGMASK4_3, + CS_ELA_SIGMASK4_4, + CS_ELA_SIGMASK4_5, + CS_ELA_SIGMASK4_6, + CS_ELA_SIGMASK4_7, + CS_ELA_SIGCOMP4_0, + CS_ELA_SIGCOMP4_1, + CS_ELA_SIGCOMP4_2, + CS_ELA_SIGCOMP4_3, + CS_ELA_SIGCOMP4_4, + CS_ELA_SIGCOMP4_5, + CS_ELA_SIGCOMP4_6, + CS_ELA_SIGCOMP4_7, + + CS_ELA_NR_DYN_REGS +}; + +enum cs_ela_tracemodes { + CS_ELA_TRACEMODE_NONE, + CS_ELA_TRACEMODE_JCN, + CS_ELA_TRACEMODE_CEU_EXEC, + CS_ELA_TRACEMODE_CEU_CMDS, + CS_ELA_TRACEMODE_MCU_AHBP, + CS_ELA_TRACEMODE_HOST_AXI, + CS_ELA_NR_TRACEMODE +}; + +enum cs_ela_signal_types { + CS_ELA_SIGTYPE_JCN_REQ, + CS_ELA_SIGTYPE_JCN_RES, + CS_ELA_SIGTYPE_CEU_EXEC, + CS_ELA_SIGTYPE_CEU_CMDS, + CS_ELA_SIGTYPE_MCU_AHBP, + CS_ELA_SIGTYPE_HOST_AXI, + CS_ELA_NR_SIGTYPE, +}; + +struct cs_ela_state { + enum cs_ela_tracemodes tracemode; + u32 supported_tracemodes; + int enabled; + u32 signal_types[CS_ELA_NR_SIGTYPE]; + u32 regs[CS_ELA_NR_DYN_REGS]; +}; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE +static char *type_name = "mali-source-ela"; +#endif + +static struct cs_ela_state ela_state = { 0 }; + +/* Setup ELA sysfs attributes */ +static char *tracemode_names[] = { + [CS_ELA_TRACEMODE_NONE] = "NONE", [CS_ELA_TRACEMODE_JCN] = "JCN", + [CS_ELA_TRACEMODE_CEU_EXEC] = "CEU_EXEC", [CS_ELA_TRACEMODE_CEU_CMDS] = "CEU_CMDS", + [CS_ELA_TRACEMODE_MCU_AHBP] = "MCU_AHBP", [CS_ELA_TRACEMODE_HOST_AXI] = "HOST_AXI", +}; + +static char *signal_type_names[] = { + [CS_ELA_SIGTYPE_JCN_REQ] = "jcn-request", [CS_ELA_SIGTYPE_JCN_RES] = "jcn-response", + [CS_ELA_SIGTYPE_CEU_EXEC] = "ceu-execution", [CS_ELA_SIGTYPE_CEU_CMDS] = "ceu-commands", + [CS_ELA_SIGTYPE_MCU_AHBP] = "mcu-ahbp", [CS_ELA_SIGTYPE_HOST_AXI] = "host-axi", +}; + +static int signal_type_tracemode_map[] = { + [CS_ELA_SIGTYPE_JCN_REQ] = CS_ELA_TRACEMODE_JCN, + [CS_ELA_SIGTYPE_JCN_RES] = CS_ELA_TRACEMODE_JCN, + [CS_ELA_SIGTYPE_CEU_EXEC] = CS_ELA_TRACEMODE_CEU_EXEC, + [CS_ELA_SIGTYPE_CEU_CMDS] = CS_ELA_TRACEMODE_CEU_CMDS, + [CS_ELA_SIGTYPE_MCU_AHBP] = CS_ELA_TRACEMODE_MCU_AHBP, + [CS_ELA_SIGTYPE_HOST_AXI] = CS_ELA_TRACEMODE_HOST_AXI, +}; + +static void setup_tracemode_registers(int tracemode) +{ + switch (tracemode) { + case CS_ELA_TRACEMODE_NONE: + /* Perform full reset of all dynamic registers */ + memset(ela_state.regs, 0x00000000, sizeof(u32) * CS_ELA_NR_DYN_REGS); + + ela_state.tracemode = CS_ELA_TRACEMODE_NONE; + break; + case CS_ELA_TRACEMODE_JCN: + + if (ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ] == + ela_state.signal_types[CS_ELA_SIGTYPE_JCN_RES]) { + ela_state.regs[CS_ELA_TSSR] = 0x00000000; + + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ]; + + ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000010; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x00001000; + ela_state.regs[CS_ELA_TWBSEL0] = 0x0000FFFF; + ela_state.regs[CS_ELA_QUALMASK0] = 0x00000000; + ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000000; + + memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); + ela_state.regs[CS_ELA_SIGMASK0_1] = 0x80000000; + ela_state.regs[CS_ELA_SIGMASK0_3] = 0x80000000; + ela_state.regs[CS_ELA_SIGCOMP0_1] = 0x80000000; + ela_state.regs[CS_ELA_SIGCOMP0_3] = 0x80000000; + + memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); + + ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; + + } else { + ela_state.regs[CS_ELA_TSSR] = 0x00000010; + + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ]; + + ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000100; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; + ela_state.regs[CS_ELA_TWBSEL0] = 0x00000FFF; + ela_state.regs[CS_ELA_QUALMASK0] = 0x00000000; + ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000000; + + memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); + ela_state.regs[CS_ELA_SIGMASK0_2] |= 0x80000000; + ela_state.regs[CS_ELA_SIGCOMP0_2] |= 0x80000000; + + ela_state.regs[CS_ELA_SIGSEL4] = + ela_state.signal_types[CS_ELA_SIGTYPE_JCN_RES]; + ela_state.regs[CS_ELA_NEXTSTATE4] = 0x00000010; + ela_state.regs[CS_ELA_ACTION4] = 0x00000008; + ela_state.regs[CS_ELA_ALTNEXTSTATE4] = 0x00000001; + ela_state.regs[CS_ELA_COMPCTRL4] = 0x00000100; + ela_state.regs[CS_ELA_TWBSEL4] = 0x00000FFF; + + memset(&ela_state.regs[CS_ELA_SIGMASK4_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGMASK4_0 + 1)); + ela_state.regs[CS_ELA_SIGMASK4_2] |= 0x80000000; + ela_state.regs[CS_ELA_SIGCOMP4_2] |= 0x80000000; + } + + break; + case CS_ELA_TRACEMODE_CEU_EXEC: + case CS_ELA_TRACEMODE_CEU_CMDS: + ela_state.regs[CS_ELA_TSSR] = 0x00000000; + + if (tracemode == CS_ELA_TRACEMODE_CEU_EXEC) { + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_CEU_EXEC]; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x00001000; + } else if (tracemode == CS_ELA_TRACEMODE_CEU_CMDS) { + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_CEU_CMDS]; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; + } + + ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000001; + ela_state.regs[CS_ELA_TWBSEL0] = 0x0000FFFF; + ela_state.regs[CS_ELA_QUALMASK0] = 0x0000000F; + ela_state.regs[CS_ELA_QUALCOMP0] = 0x0000000F; + + memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); + + memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); + + ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; + + break; + case CS_ELA_TRACEMODE_MCU_AHBP: + case CS_ELA_TRACEMODE_HOST_AXI: + ela_state.regs[CS_ELA_TSSR] = 0x00000000; + + if (tracemode == CS_ELA_TRACEMODE_MCU_AHBP) + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_MCU_AHBP]; + else if (tracemode == CS_ELA_TRACEMODE_HOST_AXI) + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_HOST_AXI]; + + ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000001; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; + ela_state.regs[CS_ELA_TWBSEL0] = 0x000000FF; + ela_state.regs[CS_ELA_QUALMASK0] = 0x00000003; + ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000003; + + memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); + + memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); + + ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; + + break; + } + ela_state.tracemode = tracemode; +} + +static ssize_t select_show(struct device *dev, struct device_attribute *attr, char *const buf) +{ + ssize_t ret = 0; + unsigned int mode; + + for (mode = CS_ELA_TRACEMODE_NONE; mode < CS_ELA_NR_TRACEMODE; mode++) { + if (ela_state.supported_tracemodes & (1U << mode)) { + if (ela_state.tracemode == mode) + ret += sprintf(buf + ret, "[%s]\n", tracemode_names[mode]); + else + ret += sprintf(buf + ret, "%s\n", tracemode_names[mode]); + } + } + return ret; +} + +static ssize_t select_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + unsigned int mode = 0; + + /* Check if enabled and return error */ + if (ela_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers"); + return -EINVAL; + } + + for (mode = CS_ELA_TRACEMODE_NONE; mode < CS_ELA_NR_TRACEMODE; mode++) { + if (sysfs_streq(tracemode_names[mode], buf) && + (ela_state.supported_tracemodes & (1U << mode))) { + setup_tracemode_registers(mode); + return count; + } + } + + dev_err(drvdata->base.dev, "Invalid tracemode: %s", buf); + return -EINVAL; +} + +static DEVICE_ATTR_RW(select); + +static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf) +{ + return sprintf(buf, "%d\n", ela_state.enabled); +} + +static DEVICE_ATTR_RO(is_enabled); + +static ssize_t sprintf_regs(char *const buf, int from_reg, int to_reg) +{ + ssize_t ret = 0; + unsigned int i = 0; + + for (i = from_reg; i <= to_reg; i++) + ret += sprintf(buf + ret, "0x%08X ", ela_state.regs[i]); + + ret += sprintf(buf + ret, "\n"); + return ret; +} + +static ssize_t verify_store_8_regs(struct device *dev, const char *buf, size_t count, int from_reg) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + u32 regs[CS_ELA_NR_SIG_REGS] = { 0 }; + int items; + unsigned int i; + + if (ela_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers"); + return -EINVAL; + } + + items = sscanf(buf, "%x %x %x %x %x %x %x %x", ®s[0], ®s[1], ®s[2], ®s[3], + ®s[4], ®s[5], ®s[6], ®s[7]); + if (items <= 0) { + dev_err(drvdata->base.dev, "Invalid register value"); + return -EINVAL; + } + if (items != CS_ELA_NR_SIG_REGS) { + dev_err(drvdata->base.dev, "Incorrect number of registers set (%d != %d)", items, + CS_ELA_NR_SIG_REGS); + return -EINVAL; + } + for (i = 0; i < CS_ELA_NR_SIG_REGS; i++) + ela_state.regs[from_reg + i] = regs[i]; + + return count; +} + +CS_ELA_SIGREGS_ATTR_RW(sigmask0, SIGMASK0); +CS_ELA_SIGREGS_ATTR_RW(sigcomp0, SIGCOMP0); +CS_ELA_SIGREGS_ATTR_RW(sigmask4, SIGMASK4); +CS_ELA_SIGREGS_ATTR_RW(sigcomp4, SIGCOMP4); + +static struct attribute *coresight_ela_attrs[] = { + &dev_attr_select.attr, + &dev_attr_is_enabled.attr, + &dev_attr_sigmask0.attr, + &dev_attr_sigcomp0.attr, + &dev_attr_sigmask4.attr, + &dev_attr_sigcomp4.attr, + NULL, +}; + +static struct attribute_group coresight_ela_group = { + .attrs = coresight_ela_attrs, +}; + +static const struct attribute_group *coresight_ela_groups[] = { + &coresight_ela_group, + NULL, +}; + +const struct attribute_group **coresight_mali_source_groups_get(void) +{ + return coresight_ela_groups; +} + +/* Initialize ELA coresight driver */ + +static struct kbase_debug_coresight_csf_address_range ela_range[] = { + { CS_ELA_BASE_ADDR, CS_ELA_BASE_ADDR + CORESIGHT_DEVTYPE }, + { CS_GPU_COMMAND_ADDR, CS_GPU_COMMAND_ADDR } +}; + +static struct kbase_debug_coresight_csf_op ela_enable_ops[] = { + /* Clearing CTRL.RUN and the read only CTRL.TRACE_BUSY. */ + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, 0x00000000), + /* Poll CTRL.TRACE_BUSY until it becomes low to ensure that trace has stopped. */ + POLL_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_TRACE_BUSY, 0x0), + /* 0 for now. TSEN = 1 or TSINT = 8 in future */ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TIMECTRL, &ela_state.regs[CS_ELA_TIMECTRL]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TSSR, &ela_state.regs[CS_ELA_TSSR]), + /* ATID[6:0] = 4; valid range 0x1-0x6F, value must be unique and needs to be + * known for trace extraction + */ + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ATBCTRL, 0x00000400), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_PTACTION, ELA_ACTION_TRACE), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_AUXCTRL, 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CNTSEL, 0x00000000), + + /* Trigger State 0 */ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(0), &ela_state.regs[CS_ELA_SIGSEL0]), + /* May need to be configurable in future. */ + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(0), 0x00000000), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(0), 0x00000001), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ACTION(0), ELA_ACTION_TRACE), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(0), 0x00000001), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(0), ELA_ACTION_TRACE), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(0), &ela_state.regs[CS_ELA_COMPCTRL0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(0), &ela_state.regs[CS_ELA_ALTCOMPCTRL0]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(0), 0x00000000), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(0), &ela_state.regs[CS_ELA_TWBSEL0]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(0), 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(0), 0x00000000), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(0), &ela_state.regs[CS_ELA_QUALMASK0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(0), &ela_state.regs[CS_ELA_QUALCOMP0]), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 0), &ela_state.regs[CS_ELA_SIGMASK0_0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 1), &ela_state.regs[CS_ELA_SIGMASK0_1]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 2), &ela_state.regs[CS_ELA_SIGMASK0_2]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 3), &ela_state.regs[CS_ELA_SIGMASK0_3]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 4), &ela_state.regs[CS_ELA_SIGMASK0_4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 5), &ela_state.regs[CS_ELA_SIGMASK0_5]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 6), &ela_state.regs[CS_ELA_SIGMASK0_6]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 7), &ela_state.regs[CS_ELA_SIGMASK0_7]), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 0), &ela_state.regs[CS_ELA_SIGCOMP0_0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 1), &ela_state.regs[CS_ELA_SIGCOMP0_1]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 2), &ela_state.regs[CS_ELA_SIGCOMP0_2]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 3), &ela_state.regs[CS_ELA_SIGCOMP0_3]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 4), &ela_state.regs[CS_ELA_SIGCOMP0_4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 5), &ela_state.regs[CS_ELA_SIGCOMP0_5]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 6), &ela_state.regs[CS_ELA_SIGCOMP0_6]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 7), &ela_state.regs[CS_ELA_SIGCOMP0_7]), + + WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(1), CS_ELA_BASE_ADDR + ELA_SIGCOMP(1, 7), + 0x00000000), + WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(2), CS_ELA_BASE_ADDR + ELA_SIGCOMP(2, 7), + 0x00000000), + WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(3), CS_ELA_BASE_ADDR + ELA_SIGCOMP(3, 7), + 0x00000000), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(1), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(2), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(3), 0x11111111), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(1), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(2), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(3), 0x11111111), + + /* Trigger State 4 */ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(4), &ela_state.regs[CS_ELA_SIGSEL4]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(4), 0x00000000), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(4), &ela_state.regs[CS_ELA_NEXTSTATE4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ACTION(4), &ela_state.regs[CS_ELA_ACTION4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(4), &ela_state.regs[CS_ELA_ALTNEXTSTATE4]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(4), ELA_ACTION_TRACE), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(4), &ela_state.regs[CS_ELA_COMPCTRL4]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(4), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(4), 0x00000000), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(4), &ela_state.regs[CS_ELA_TWBSEL4]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(4), 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(4), 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(4), 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(4), 0x00000000), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 0), &ela_state.regs[CS_ELA_SIGMASK4_0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 1), &ela_state.regs[CS_ELA_SIGMASK4_1]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 2), &ela_state.regs[CS_ELA_SIGMASK4_2]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 3), &ela_state.regs[CS_ELA_SIGMASK4_3]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 4), &ela_state.regs[CS_ELA_SIGMASK4_4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 5), &ela_state.regs[CS_ELA_SIGMASK4_5]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 6), &ela_state.regs[CS_ELA_SIGMASK4_6]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 7), &ela_state.regs[CS_ELA_SIGMASK4_7]), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 0), &ela_state.regs[CS_ELA_SIGCOMP4_0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 1), &ela_state.regs[CS_ELA_SIGCOMP4_1]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 2), &ela_state.regs[CS_ELA_SIGCOMP4_2]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 3), &ela_state.regs[CS_ELA_SIGCOMP4_3]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 4), &ela_state.regs[CS_ELA_SIGCOMP4_4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 5), &ela_state.regs[CS_ELA_SIGCOMP4_5]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 6), &ela_state.regs[CS_ELA_SIGCOMP4_6]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 7), &ela_state.regs[CS_ELA_SIGCOMP4_7]), + + WRITE_IMM_OP(CS_GPU_COMMAND_ADDR, CS_GPU_COMMAND_TRACE_CONTROL_EN), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_RUN), + + BIT_OR_OP(&ela_state.enabled, 0x1), +}; + +static struct kbase_debug_coresight_csf_op ela_disable_ops[] = { + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, 0x00000000), + /* Poll CTRL.TRACE_BUSY until it becomes low to ensure that trace has stopped. */ + POLL_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_TRACE_BUSY, 0x0), + + BIT_AND_OP(&ela_state.enabled, 0x0), +}; + +static int parse_signal_groups(struct coresight_mali_source_drvdata *drvdata) +{ + struct device_node *signal_groups = NULL; + unsigned int siggrp_idx; + + if (drvdata->base.dev->of_node) + signal_groups = of_get_child_by_name(drvdata->base.dev->of_node, "signal-groups"); + + if (!signal_groups) { + dev_err(drvdata->base.dev, "Failed to find signal groups OF node"); + return -EINVAL; + } + + for (siggrp_idx = 0; siggrp_idx < CS_ELA_MAX_SIGNAL_GROUPS; siggrp_idx++) { + char buf[CS_SG_NAME_MAX_LEN]; + ssize_t res; + const char *name; + struct property *prop; + + res = snprintf(buf, CS_SG_NAME_MAX_LEN, "sg%d", siggrp_idx); + if (res <= 0) { + dev_err(drvdata->base.dev, + "Signal group name %d snprintf failed unexpectedly", siggrp_idx); + return -EINVAL; + } + + of_property_for_each_string(signal_groups, buf, prop, name) { + int sig_type; + + for (sig_type = 0; sig_type < CS_ELA_NR_SIGTYPE; sig_type++) { + if (!strncmp(signal_type_names[sig_type], name, + strlen(signal_type_names[sig_type]))) { + ela_state.signal_types[sig_type] = (1U << siggrp_idx); + ela_state.supported_tracemodes |= + (1U << signal_type_tracemode_map[sig_type]); + } + } + } + } + + /* Add TRACEMODE_NONE as supported to allow printing */ + ela_state.supported_tracemodes |= (1U << CS_ELA_TRACEMODE_NONE); + + return 0; +} + +int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + int res = 0; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + drvdata->type_name = type_name; +#endif + + drvdata->base.kbase_client = kbase_debug_coresight_csf_register( + drvdata->base.gpu_dev, ela_range, NELEMS(ela_range)); + if (drvdata->base.kbase_client == NULL) { + dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly"); + return -EINVAL; + } + + drvdata->trcid = CS_MALI_TRACE_ID; + + drvdata->base.enable_seq.ops = ela_enable_ops; + drvdata->base.enable_seq.nr_ops = NELEMS(ela_enable_ops); + + drvdata->base.disable_seq.ops = ela_disable_ops; + drvdata->base.disable_seq.nr_ops = NELEMS(ela_disable_ops); + + drvdata->base.config = kbase_debug_coresight_csf_config_create( + drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq); + if (!drvdata->base.config) { + dev_err(drvdata->base.dev, "config create failed unexpectedly"); + return -EINVAL; + } + + res = parse_signal_groups(drvdata); + if (res) { + dev_err(drvdata->base.dev, "Failed to parse signal groups"); + return res; + } + + return 0; +} + +void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + if (drvdata->base.config != NULL) + kbase_debug_coresight_csf_config_free(drvdata->base.config); + + if (drvdata->base.kbase_client != NULL) + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); +} + +static const struct of_device_id mali_source_ids[] = { { .compatible = + "arm,coresight-mali-source-ela" }, + {} }; + +static struct platform_driver mali_sources_platform_driver = { + .probe = coresight_mali_sources_probe, + .remove = coresight_mali_sources_remove, + .driver = { + .name = "coresight-mali-source-ela", + .owner = THIS_MODULE, + .of_match_table = mali_source_ids, + .suppress_bind_attrs = true, + }, +}; + +static int __init mali_sources_init(void) +{ + return platform_driver_register(&mali_sources_platform_driver); +} + +static void __exit mali_sources_exit(void) +{ + platform_driver_unregister(&mali_sources_platform_driver); +} + +module_init(mali_sources_init); +module_exit(mali_sources_exit); + +MODULE_AUTHOR("Arm Ltd."); +MODULE_DESCRIPTION("Arm Coresight Mali source ELA"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c b/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c new file mode 100644 index 000000000000..ae9c2f7f2a8c --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include "sources/coresight_mali_sources.h" + +#define CS_ETM_BASE_ADDR 0xE0041000 +#define CS_MALI_TRACE_ID 0x00000010 + +#ifndef TRCVICTLR_SSSTATUS +#define TRCVICTLR_SSSTATUS BIT(9) +#endif + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE +static char *type_name = "mali-source-etm"; +#endif + +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +enum cs_etm_dynamic_regs { + CS_ETM_TRCCONFIGR, + CS_ETM_TRCTRACEIDR, + CS_ETM_TRCVDARCCTLR, + CS_ETM_TRCSTALLCTLR, + CS_ETM_TRCVIIECTLR, + CS_ETM_NR_DYN_REGS +}; + +struct cs_etm_state { + int enabled; + u32 regs[CS_ETM_NR_DYN_REGS]; +}; + +static struct cs_etm_state etm_state = { 0 }; + +static struct kbase_debug_coresight_csf_address_range etm_range[] = { + { CS_ETM_BASE_ADDR, CS_ETM_BASE_ADDR + CORESIGHT_DEVTYPE }, +}; + +struct kbase_debug_coresight_csf_op etm_enable_ops[] = { + // Unlock ETM configuration + WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // Power up request + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPDCR, TRCPDCR_PU), + // Disable Tracing + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x00000000), + // Check the tracing unit is inactive before programming + POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), BIT(TRCSTATR_IDLE_BIT)), + // Set trace configuration to enable global timestamping, and data value tracing + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCCONFIGR, &etm_state.regs[CS_ETM_TRCCONFIGR]), + // Set event control 0 register + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCEVENTCTL0R, 0x00000000), + // Set event control 1 register + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCEVENTCTL1R, 0x00000000), + // Set trace ID + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCTRACEIDR, &etm_state.regs[CS_ETM_TRCTRACEIDR]), + // Configure stall control register + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCSTALLCTLR, &etm_state.regs[CS_ETM_TRCSTALLCTLR]), + // Synchronization period register - sync every 2^11 bytes + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCSYNCPR, 0x0000000C), + // Set global timestamp control register to select resource 0 + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCTSCTLR, 0x00000000), + // Set viewData include/exclude address range comparators to 0 + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCVDARCCTLR, &etm_state.regs[CS_ETM_TRCVDARCCTLR]), + // Set viewData main control to select resource 0 + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVDCTLR, 0x00000001), + //Set viewData comparators to 0 + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVDSACCTLR, 0x00000000), + // Set stop/start logic to started state, select resource 1 + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVICTLR, TRCVICTLR_SSSTATUS | BIT(0)), + // Set viewInst start and stop control + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVISSCTLR, 0x00000000), + // Set viewInst include and exclude control to math all addresses in range + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCVIIECTLR, &etm_state.regs[CS_ETM_TRCVIIECTLR]), + // enable trace + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x1), + // Wait that the unit is busy + POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), 0), + // Lock the ETM configuration + WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Set enabled bit on at the end of sequence + BIT_OR_OP(&etm_state.enabled, 0x1), +}; + +struct kbase_debug_coresight_csf_op etm_disable_ops[] = { + // Unlock ETM configuration + WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // Disable trace unit + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x00000000), + // Poll until idle + POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), BIT(TRCSTATR_IDLE_BIT)), + // Lock ETM configuration + WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Set enabled bit off at the end of sequence + BIT_AND_OP(&etm_state.enabled, 0x0), +}; + +static void set_default_regs(void) +{ + // Turn on instruction tracing + etm_state.regs[CS_ETM_TRCCONFIGR] = 0x00000800; + // Set ID + etm_state.regs[CS_ETM_TRCTRACEIDR] = CS_MALI_TRACE_ID; + // Set data comparators to none + etm_state.regs[CS_ETM_TRCVDARCCTLR] = 0x00000000; + // Set instructions address filter to none + etm_state.regs[CS_ETM_TRCVIIECTLR] = 0x00000000; + // Set stall configuration to a basic setting + etm_state.regs[CS_ETM_TRCSTALLCTLR] = 0x00000000; +} + +static const struct of_device_id mali_source_ids[] = { { .compatible = + "arm,coresight-mali-source-etm" }, + {} }; + +int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + int ret = 0; + + if (drvdata == NULL) + return -EINVAL; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + drvdata->type_name = type_name; +#endif + etm_state.enabled = 0x0; + + drvdata->base.kbase_client = kbase_debug_coresight_csf_register( + drvdata->base.gpu_dev, etm_range, NELEMS(etm_range)); + if (drvdata->base.kbase_client == NULL) { + dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly\n"); + return -EINVAL; + } + + set_default_regs(); + drvdata->trcid = CS_MALI_TRACE_ID; + + drvdata->base.enable_seq.ops = etm_enable_ops; + drvdata->base.enable_seq.nr_ops = NELEMS(etm_enable_ops); + + drvdata->base.disable_seq.ops = etm_disable_ops; + drvdata->base.disable_seq.nr_ops = NELEMS(etm_disable_ops); + + drvdata->base.config = kbase_debug_coresight_csf_config_create( + drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq); + if (!drvdata->base.config) { + dev_err(drvdata->base.dev, "Config create failed unexpectedly\n"); + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); + return -EINVAL; + } + + return ret; +} + +void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + if (drvdata->base.config != NULL) + kbase_debug_coresight_csf_config_free(drvdata->base.config); + + if (drvdata->base.kbase_client != NULL) + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); +} + +static int verify_store_reg(struct device *dev, const char *buf, size_t count, int reg) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + u32 val; + int err; + + if (buf == NULL) + return -EINVAL; + + if (etm_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers\n"); + return -EINVAL; + } + + err = kstrtou32(buf, 0, &val); + if (err) { + dev_err(drvdata->base.dev, "Invalid input value\n"); + return -EINVAL; + } + + etm_state.regs[reg] = val; + return count; +} + +#define CS_ETM_REG_ATTR_RW(_a, _b) \ + static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf(buf, "%#x\n", etm_state.regs[CS_ETM_##_b]); \ + } \ + static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_reg(dev, buf, count, CS_ETM_##_b); \ + } \ + static DEVICE_ATTR_RW(_a) + +CS_ETM_REG_ATTR_RW(trcconfigr, TRCCONFIGR); +CS_ETM_REG_ATTR_RW(trctraceidr, TRCTRACEIDR); +CS_ETM_REG_ATTR_RW(trcvdarcctlr, TRCVDARCCTLR); +CS_ETM_REG_ATTR_RW(trcviiectlr, TRCVIIECTLR); +CS_ETM_REG_ATTR_RW(trcstallctlr, TRCSTALLCTLR); + +static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf) +{ + return sprintf(buf, "%d\n", etm_state.enabled); +} +static DEVICE_ATTR_RO(is_enabled); + +static struct attribute *coresight_etm_attrs[] = { + &dev_attr_is_enabled.attr, + &dev_attr_trcconfigr.attr, + &dev_attr_trctraceidr.attr, + &dev_attr_trcvdarcctlr.attr, + &dev_attr_trcviiectlr.attr, + &dev_attr_trcstallctlr.attr, + NULL, +}; +static struct attribute_group coresight_etm_group = { .attrs = coresight_etm_attrs, + .name = "mgmt" }; +static const struct attribute_group *coresight_etm_groups[] = { + &coresight_etm_group, + NULL, +}; +const struct attribute_group **coresight_mali_source_groups_get(void) +{ + return coresight_etm_groups; +} + +static struct platform_driver mali_sources_platform_driver = { + .probe = coresight_mali_sources_probe, + .remove = coresight_mali_sources_remove, + .driver = { + .name = "coresight-mali-source-etm", + .owner = THIS_MODULE, + .of_match_table = mali_source_ids, + .suppress_bind_attrs = true, + }, +}; + +static int __init mali_sources_init(void) +{ + return platform_driver_register(&mali_sources_platform_driver); +} + +static void __exit mali_sources_exit(void) +{ + platform_driver_unregister(&mali_sources_platform_driver); +} + +module_init(mali_sources_init); +module_exit(mali_sources_exit); + +MODULE_AUTHOR("ARM Ltd."); +MODULE_DESCRIPTION("Arm Coresight Mali source ETM"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c new file mode 100644 index 000000000000..9f60192a682b --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include "sources/coresight_mali_sources.h" + +/* Linux Coresight framework does not support multiple sources enabled + * at the same time. + * + * To avoid Kernel instability, all Mali Coresight sources use the + * same trace ID value as the mandatory ETM one. + */ +#define CS_MALI_TRACE_ID 0x00000010 + +#define CS_SCS_BASE_ADDR 0xE000E000 +#define SCS_DEMCR 0xDFC +#define CS_ITM_BASE_ADDR 0xE0000000 +#define ITM_TCR 0xE80 +#define ITM_TCR_BUSY_BIT (0x1 << 22) +#define CS_DWT_BASE_ADDR 0xE0001000 +#define DWT_CTRL 0x000 +#define DWT_CYCCNT 0x004 + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE +static char *type_name = "mali-source-itm"; +#endif + +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +enum cs_itm_dwt_dynamic_regs { CS_DWT_CTRL, CS_ITM_TCR, CS_ITM_DWT_NR_DYN_REGS }; + +struct cs_itm_state { + int enabled; + u32 regs[CS_ITM_DWT_NR_DYN_REGS]; +}; + +static struct cs_itm_state itm_state = { 0 }; + +static struct kbase_debug_coresight_csf_address_range dwt_itm_range[] = { + { CS_SCS_BASE_ADDR, CS_SCS_BASE_ADDR + CORESIGHT_DEVTYPE }, + { CS_ITM_BASE_ADDR, CS_ITM_BASE_ADDR + CORESIGHT_DEVTYPE }, + { CS_DWT_BASE_ADDR, CS_DWT_BASE_ADDR + CORESIGHT_DEVTYPE } +}; + +static struct kbase_debug_coresight_csf_op dwt_itm_enable_ops[] = { + // enable ITM/DWT functionality via DEMCR register + WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x01000000), + // Unlock DWT configuration + WRITE_IMM_OP(CS_DWT_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // prep DWT counter to immediately send sync packet ((1 << 24) - 1) + WRITE_IMM_OP(CS_DWT_BASE_ADDR + DWT_CYCCNT, 0x00ffffff), + // Write initial value of post count counter + WRITE_IMM_OP(CS_DWT_BASE_ADDR + DWT_CTRL, 0x00000020), + // Set DWT configuration: + WRITE_PTR_OP(CS_DWT_BASE_ADDR + DWT_CTRL, &itm_state.regs[CS_DWT_CTRL]), + // Lock DWT Configuration + WRITE_IMM_OP(CS_DWT_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Unlock DWT configuration + WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // Set ITM configuration: + WRITE_PTR_OP(CS_ITM_BASE_ADDR + ITM_TCR, &itm_state.regs[CS_ITM_TCR]), + // Lock DWT configuration + WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Set enabled bit on at the end of sequence + BIT_OR_OP(&itm_state.enabled, 0x1), +}; + +static struct kbase_debug_coresight_csf_op dwt_itm_disable_ops[] = { + // Disable ITM/DWT functionality via DEMCR register + WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x00000000), + // Unlock ITM configuration + WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // Check ITM is disabled + POLL_OP(CS_ITM_BASE_ADDR + ITM_TCR, ITM_TCR_BUSY_BIT, 0x0), + // Lock + WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Set enabled bit off at the end of sequence + BIT_AND_OP(&itm_state.enabled, 0x0), +}; + +static void set_default_regs(void) +{ + // DWT configuration: + // [0] = 1, enable cycle counter + // [4:1] = 4, set PC sample rate pf 256 cycles + // [8:5] = 1, set initial post count value + // [9] = 1, select position of post count tap on the cycle counter + // [10:11] = 1, enable sync packets + // [12] = 1, enable periodic PC sample packets + itm_state.regs[CS_DWT_CTRL] = 0x00001629; + // ITM configuration: + // [0] = 1, Enable ITM + // [1] = 1, Enable Time stamp generation + // [2] = 1, Enable sync packet transmission + // [3] = 1, Enable HW event forwarding + // [11:10] = 1, Generate TS request approx every 128 cycles + // [22:16] = 1, Trace bus ID + itm_state.regs[CS_ITM_TCR] = 0x0001040F; +} + +static int verify_store_reg(struct device *dev, const char *buf, size_t count, int reg) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + u32 val; + int err; + + if (buf == NULL) + return -EINVAL; + + if (itm_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers\n"); + return -EINVAL; + } + + err = kstrtou32(buf, 0, &val); + if (err) { + dev_err(drvdata->base.dev, "Invalid input value\n"); + return -EINVAL; + } + + itm_state.regs[reg] = val; + return count; +} + +static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf) +{ + return sprintf(buf, "%d\n", itm_state.enabled); +} +static DEVICE_ATTR_RO(is_enabled); + +#define CS_ITM_DWT_REG_ATTR_RW(_a, _b) \ + static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf(buf, "%#x\n", itm_state.regs[CS_##_b]); \ + } \ + static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_reg(dev, buf, count, CS_##_b); \ + } \ + static DEVICE_ATTR_RW(_a) + +CS_ITM_DWT_REG_ATTR_RW(dwt_ctrl, DWT_CTRL); +CS_ITM_DWT_REG_ATTR_RW(itm_tcr, ITM_TCR); + +static struct attribute *coresight_mali_source_attrs[] = { + &dev_attr_is_enabled.attr, + &dev_attr_dwt_ctrl.attr, + &dev_attr_itm_tcr.attr, + NULL, +}; + +static const struct attribute_group coresight_mali_source_group = { + .attrs = coresight_mali_source_attrs, + .name = "mgmt" +}; + +static const struct attribute_group *coresight_mali_source_groups[] = { + &coresight_mali_source_group, + NULL, +}; + +const struct attribute_group **coresight_mali_source_groups_get(void) +{ + return coresight_mali_source_groups; +} + +int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + if (drvdata == NULL) + return -EINVAL; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + drvdata->type_name = type_name; +#endif + + drvdata->base.kbase_client = kbase_debug_coresight_csf_register( + drvdata->base.gpu_dev, dwt_itm_range, NELEMS(dwt_itm_range)); + if (drvdata->base.kbase_client == NULL) { + dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly\n"); + return -EINVAL; + } + + drvdata->trcid = CS_MALI_TRACE_ID; + + drvdata->base.enable_seq.ops = dwt_itm_enable_ops; + drvdata->base.enable_seq.nr_ops = NELEMS(dwt_itm_enable_ops); + + drvdata->base.disable_seq.ops = dwt_itm_disable_ops; + drvdata->base.disable_seq.nr_ops = NELEMS(dwt_itm_disable_ops); + + set_default_regs(); + + drvdata->base.config = kbase_debug_coresight_csf_config_create( + drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq); + if (!drvdata->base.config) { + dev_err(drvdata->base.dev, "config create failed unexpectedly\n"); + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); + return -EINVAL; + } + + return 0; +} + +void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + if (drvdata->base.config != NULL) + kbase_debug_coresight_csf_config_free(drvdata->base.config); + + if (drvdata->base.kbase_client != NULL) + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); +} + +static const struct of_device_id mali_source_ids[] = { { .compatible = + "arm,coresight-mali-source-itm" }, + {} }; + +static struct platform_driver mali_sources_platform_driver = { + .probe = coresight_mali_sources_probe, + .remove = coresight_mali_sources_remove, + .driver = { + .name = "coresight-mali-source-itm", + .owner = THIS_MODULE, + .of_match_table = mali_source_ids, + .suppress_bind_attrs = true, + }, +}; + +static int __init mali_sources_init(void) +{ + return platform_driver_register(&mali_sources_platform_driver); +} + +static void __exit mali_sources_exit(void) +{ + platform_driver_unregister(&mali_sources_platform_driver); +} + +module_init(mali_sources_init); +module_exit(mali_sources_exit); + +MODULE_AUTHOR("ARM Ltd."); +MODULE_DESCRIPTION("Arm Coresight Mali source ITM"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mali_kbase_debug_coresight_csf.h b/include/linux/mali_kbase_debug_coresight_csf.h new file mode 100644 index 000000000000..8356fd497e74 --- /dev/null +++ b/include/linux/mali_kbase_debug_coresight_csf.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DEBUG_CORESIGHT_CSF_ +#define _KBASE_DEBUG_CORESIGHT_CSF_ + +#include +#include + +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP 0U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM 1U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE 2U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE 3U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ 4U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL 5U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR 6U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR 7U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND 8U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT 9U + +/** + * struct kbase_debug_coresight_csf_write_imm_op - Coresight immediate write operation structure + * + * @reg_addr: Register address to write to. + * @val: Value to write at @reg_addr. + */ +struct kbase_debug_coresight_csf_write_imm_op { + __u32 reg_addr; + __u32 val; +}; + +/** + * struct kbase_debug_coresight_csf_write_imm_range_op - Coresight immediate write range + * operation structure + * + * @reg_start: Register address to start writing from. + * @reg_end: Register address to stop writing from. End address included in the write range. + * @val: Value to write at @reg_addr. + */ +struct kbase_debug_coresight_csf_write_imm_range_op { + __u32 reg_start; + __u32 reg_end; + __u32 val; +}; + +/** + * struct kbase_debug_coresight_csf_write_op - Coresight write operation structure + * + * @reg_addr: Register address to write to. + * @ptr: Pointer to the value to write at @reg_addr. + */ +struct kbase_debug_coresight_csf_write_op { + __u32 reg_addr; + __u32 *ptr; +}; + +/** + * struct kbase_debug_coresight_csf_read_op - Coresight read operation structure + * + * @reg_addr: Register address to read. + * @ptr: Pointer where to store the read value. + */ +struct kbase_debug_coresight_csf_read_op { + __u32 reg_addr; + __u32 *ptr; +}; + +/** + * struct kbase_debug_coresight_csf_poll_op - Coresight poll operation structure + * + * @reg_addr: Register address to poll. + * @val: Expected value after poll. + * @mask: Mask to apply on the read value from @reg_addr when comparing against @val. + */ +struct kbase_debug_coresight_csf_poll_op { + __u32 reg_addr; + __u32 val; + __u32 mask; +}; + +/** + * struct kbase_debug_coresight_csf_bitw_op - Coresight bitwise operation structure + * + * @ptr: Pointer to the variable on which to execute the bit operation. + * @val: Value with which the operation should be executed against @ptr value. + */ +struct kbase_debug_coresight_csf_bitw_op { + __u32 *ptr; + __u32 val; +}; + +/** + * struct kbase_debug_coresight_csf_op - Coresight supported operations + * + * @type: Operation type. + * @padding: Padding for 64bit alignment. + * @op: Operation union. + * @op.write_imm: Parameters for immediate write operation. + * @op.write_imm_range: Parameters for immediate range write operation. + * @op.write: Parameters for write operation. + * @op.read: Parameters for read operation. + * @op.poll: Parameters for poll operation. + * @op.bitw: Parameters for bitwise operation. + * @op.padding: Padding for 64bit alignment. + * + * All operation structures should include padding to ensure they are the same size. + */ +struct kbase_debug_coresight_csf_op { + __u8 type; + __u8 padding[7]; + union { + struct kbase_debug_coresight_csf_write_imm_op write_imm; + struct kbase_debug_coresight_csf_write_imm_range_op write_imm_range; + struct kbase_debug_coresight_csf_write_op write; + struct kbase_debug_coresight_csf_read_op read; + struct kbase_debug_coresight_csf_poll_op poll; + struct kbase_debug_coresight_csf_bitw_op bitw; + u32 padding[3]; + } op; +}; + +/** + * struct kbase_debug_coresight_csf_sequence - Coresight sequence of operations + * + * @ops: Arrays containing Coresight operations. + * @nr_ops: Size of @ops. + */ +struct kbase_debug_coresight_csf_sequence { + struct kbase_debug_coresight_csf_op *ops; + int nr_ops; +}; + +/** + * struct kbase_debug_coresight_csf_address_range - Coresight client address range + * + * @start: Start offset of the address range. + * @end: End offset of the address range. + */ +struct kbase_debug_coresight_csf_address_range { + __u32 start; + __u32 end; +}; + +/** + * kbase_debug_coresight_csf_register - Register as a client for set ranges of MCU memory. + * + * @drv_data: Pointer to driver device data. + * @ranges: Pointer to an array of struct kbase_debug_coresight_csf_address_range + * that contains start and end addresses that the client will manage. + * @nr_ranges: Size of @ranges array. + * + * This function checks @ranges against current client claimed ranges. If there + * are no overlaps, a new client is created and added to the list. + * + * Return: A pointer of the registered client instance on success. NULL on failure. + */ +void *kbase_debug_coresight_csf_register(void *drv_data, + struct kbase_debug_coresight_csf_address_range *ranges, + int nr_ranges); + +/** + * kbase_debug_coresight_csf_unregister - Removes a coresight client. + * + * @client_data: A pointer to a coresight client. + * + * This function removes a client from the client list and frees the client struct. + */ +void kbase_debug_coresight_csf_unregister(void *client_data); + +/** + * kbase_debug_coresight_csf_config_create - Creates a configuration containing + * enable and disable sequence. + * + * @client_data: Pointer to a coresight client. + * @enable_seq: Pointer to a struct containing the ops needed to enable coresight blocks. + * It's optional so could be NULL. + * @disable_seq: Pointer to a struct containing ops to run to disable coresight blocks. + * It's optional so could be NULL. + * + * Return: Valid pointer on success. NULL on failure. + */ +void * +kbase_debug_coresight_csf_config_create(void *client_data, + struct kbase_debug_coresight_csf_sequence *enable_seq, + struct kbase_debug_coresight_csf_sequence *disable_seq); +/** + * kbase_debug_coresight_csf_config_free - Frees a configuration containing + * enable and disable sequence. + * + * @config_data: Pointer to a coresight configuration. + */ +void kbase_debug_coresight_csf_config_free(void *config_data); + +/** + * kbase_debug_coresight_csf_config_enable - Enables a coresight configuration + * + * @config_data: Pointer to coresight configuration. + * + * If GPU is turned on, the configuration is immediately applied the CoreSight blocks. + * If the GPU is turned off, the configuration is scheduled to be applied on the next + * time the GPU is turned on. + * + * A configuration is enabled by executing read/write/poll ops defined in config->enable_seq. + * + * Return: 0 if success. Error code on failure. + */ +int kbase_debug_coresight_csf_config_enable(void *config_data); +/** + * kbase_debug_coresight_csf_config_disable - Disables a coresight configuration + * + * @config_data: Pointer to coresight configuration. + * + * If the GPU is turned off, this is effective a NOP as kbase should have disabled + * the configuration when GPU is off. + * If the GPU is on, the configuration will be disabled. + * + * A configuration is disabled by executing read/write/poll ops defined in config->disable_seq. + * + * Return: 0 if success. Error code on failure. + */ +int kbase_debug_coresight_csf_config_disable(void *config_data); + +#endif /* _KBASE_DEBUG_CORESIGHT_CSF_ */ diff --git a/include/linux/version_compat_defs.h b/include/linux/version_compat_defs.h index d0a09985c5ca..335147cada2c 100644 --- a/include/linux/version_compat_defs.h +++ b/include/linux/version_compat_defs.h @@ -24,10 +24,12 @@ #include -#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE typedef unsigned int __poll_t; #endif +#if KERNEL_VERSION(4, 9, 78) >= LINUX_VERSION_CODE + #ifndef EPOLLHUP #define EPOLLHUP POLLHUP #endif @@ -44,4 +46,6 @@ typedef unsigned int __poll_t; #define EPOLLRDNORM POLLRDNORM #endif +#endif + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h index 613eb1fdd081..a44da7beb041 100644 --- a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h @@ -29,7 +29,11 @@ #include #define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4) +#if MALI_USE_CSF +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (65) +#else /* MALI_USE_CSF */ #define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) +#endif /* !MALI_USE_CSF */ #define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) #define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \ (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT))) @@ -56,7 +60,16 @@ #define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE) +/* + * Bit mask - no. bits set is no. cores + * Values obtained from talking to HW team + * Example: tODx has 10 cores, 0b11 1111 1111 -> 0x3FF + */ #define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX (0x7FFFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX (0x3FFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX (0x7FFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX (0xFFFull) #define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) #define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull) #define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull) diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h new file mode 100644 index 000000000000..69bc44c26361 --- /dev/null +++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Dummy Model interface + */ + +#ifndef _UAPI_KBASE_MODEL_LINUX_H_ +#define _UAPI_KBASE_MODEL_LINUX_H_ + +/* Generic model IRQs */ +enum model_linux_irqs { + MODEL_LINUX_JOB_IRQ, + MODEL_LINUX_GPU_IRQ, + MODEL_LINUX_MMU_IRQ, + MODEL_LINUX_NONE_IRQ, + MODEL_LINUX_NUM_TYPE_IRQ +}; + +#endif /* _UAPI_KBASE_MODEL_LINUX_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h index d9813c055809..ec8c02f18e16 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h @@ -118,9 +118,21 @@ #define BASE_QUEUE_MAX_PRIORITY (15U) -/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */ -#define BASEP_EVENT_VAL_INDEX (0U) -#define BASEP_EVENT_ERR_INDEX (1U) +/* Sync32 object fields definition */ +#define BASEP_EVENT32_VAL_OFFSET (0U) +#define BASEP_EVENT32_ERR_OFFSET (4U) +#define BASEP_EVENT32_SIZE_BYTES (8U) + +/* Sync64 object fields definition */ +#define BASEP_EVENT64_VAL_OFFSET (0U) +#define BASEP_EVENT64_ERR_OFFSET (8U) +#define BASEP_EVENT64_SIZE_BYTES (16U) + +/* Sync32 object alignment, equal to its size */ +#define BASEP_EVENT32_ALIGN_BYTES (8U) + +/* Sync64 object alignment, equal to its size */ +#define BASEP_EVENT64_ALIGN_BYTES (16U) /* The upper limit for number of objects that could be waited/set per command. * This limit is now enforced as internally the error inherit inputs are diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h index d9a1867e13c3..642ca3465ead 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h @@ -72,10 +72,18 @@ * - base_jit_alloc_info_11_5 * - kbase_ioctl_mem_jit_init_10_2 * - kbase_ioctl_mem_jit_init_11_5 + * 1.17: + * - Fix kinstr_prfcnt issues: + * - Missing implicit sample for CMD_STOP when HWCNT buffer is full. + * - Race condition when stopping periodic sampling. + * - prfcnt_block_metadata::block_idx gaps. + * - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed. + * 1.18: + * - CPU mappings of USER_BUFFER imported memory handles must be cached. */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 16 +#define BASE_UK_VERSION_MINOR 17 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h index 1f34d99830fe..784e09a7edc2 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h @@ -119,7 +119,6 @@ #define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) #define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) #define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5) -#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) #define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) #define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) #define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h index 9c7553ff2bd2..902d0ce9145a 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h @@ -133,9 +133,17 @@ * - base_jit_alloc_info_11_5 * - kbase_ioctl_mem_jit_init_10_2 * - kbase_ioctl_mem_jit_init_11_5 + * 11.37: + * - Fix kinstr_prfcnt issues: + * - Missing implicit sample for CMD_STOP when HWCNT buffer is full. + * - Race condition when stopping periodic sampling. + * - prfcnt_block_metadata::block_idx gaps. + * - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed. + * 11.38: + * - CPU mappings of USER_BUFFER imported memory handles must be cached. */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 36 +#define BASE_UK_VERSION_MINOR 37 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h index 962decc10efc..5089bf249528 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h @@ -445,7 +445,7 @@ struct prfcnt_metadata { * @PRFCNT_CONTROL_CMD_STOP: Stop the counter data dump run for the * calling client session. * @PRFCNT_CONTROL_CMD_SAMPLE_SYNC: Trigger a synchronous manual sample. - * @PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: Trigger an asynchronous manual sample. + * @PRFCNT_CONTROL_CMD_RESERVED: Previously SAMPLE_ASYNC not supported any more. * @PRFCNT_CONTROL_CMD_DISCARD: Discard all samples which have not yet * been consumed by userspace. Note that * this can race with new samples if @@ -455,7 +455,7 @@ enum prfcnt_control_cmd_code { PRFCNT_CONTROL_CMD_START = 1, PRFCNT_CONTROL_CMD_STOP, PRFCNT_CONTROL_CMD_SAMPLE_SYNC, - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC, + PRFCNT_CONTROL_CMD_RESERVED, PRFCNT_CONTROL_CMD_DISCARD, }; diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h index 63bf48b603ef..c8a54f91165e 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h @@ -46,8 +46,7 @@ struct kbase_ioctl_set_flags { __u32 create_flags; }; -#define KBASE_IOCTL_SET_FLAGS \ - _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) +#define KBASE_IOCTL_SET_FLAGS _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) /** * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel @@ -81,8 +80,7 @@ struct kbase_ioctl_get_gpuprops { __u32 flags; }; -#define KBASE_IOCTL_GET_GPUPROPS \ - _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) +#define KBASE_IOCTL_GET_GPUPROPS _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) /** * union kbase_ioctl_mem_alloc - Allocate memory on the GPU @@ -108,8 +106,7 @@ union kbase_ioctl_mem_alloc { } out; }; -#define KBASE_IOCTL_MEM_ALLOC \ - _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) +#define KBASE_IOCTL_MEM_ALLOC _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) /** * struct kbase_ioctl_mem_query - Query properties of a GPU memory region @@ -131,12 +128,11 @@ union kbase_ioctl_mem_query { } out; }; -#define KBASE_IOCTL_MEM_QUERY \ - _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) +#define KBASE_IOCTL_MEM_QUERY _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) -#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1) -#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2) -#define KBASE_MEM_QUERY_FLAGS ((__u64)3) +#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1) +#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2) +#define KBASE_MEM_QUERY_FLAGS ((__u64)3) /** * struct kbase_ioctl_mem_free - Free a memory region @@ -146,8 +142,7 @@ struct kbase_ioctl_mem_free { __u64 gpu_addr; }; -#define KBASE_IOCTL_MEM_FREE \ - _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) +#define KBASE_IOCTL_MEM_FREE _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) /** * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader @@ -167,7 +162,7 @@ struct kbase_ioctl_hwcnt_reader_setup { __u32 mmu_l2_bm; }; -#define KBASE_IOCTL_HWCNT_READER_SETUP \ +#define KBASE_IOCTL_HWCNT_READER_SETUP \ _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) /** @@ -182,8 +177,7 @@ struct kbase_ioctl_hwcnt_values { __u32 padding; }; -#define KBASE_IOCTL_HWCNT_SET \ - _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) +#define KBASE_IOCTL_HWCNT_SET _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) /** * struct kbase_ioctl_disjoint_query - Query the disjoint counter @@ -193,8 +187,7 @@ struct kbase_ioctl_disjoint_query { __u32 counter; }; -#define KBASE_IOCTL_DISJOINT_QUERY \ - _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) +#define KBASE_IOCTL_DISJOINT_QUERY _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) /** * struct kbase_ioctl_get_ddk_version - Query the kernel version @@ -215,8 +208,7 @@ struct kbase_ioctl_get_ddk_version { __u32 padding; }; -#define KBASE_IOCTL_GET_DDK_VERSION \ - _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) +#define KBASE_IOCTL_GET_DDK_VERSION _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) /** * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory @@ -241,8 +233,7 @@ struct kbase_ioctl_mem_jit_init { __u64 phys_pages; }; -#define KBASE_IOCTL_MEM_JIT_INIT \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) +#define KBASE_IOCTL_MEM_JIT_INIT _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) /** * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory @@ -262,8 +253,7 @@ struct kbase_ioctl_mem_sync { __u8 padding[7]; }; -#define KBASE_IOCTL_MEM_SYNC \ - _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) +#define KBASE_IOCTL_MEM_SYNC _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) /** * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer @@ -286,7 +276,7 @@ union kbase_ioctl_mem_find_cpu_offset { } out; }; -#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ +#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) /** @@ -298,8 +288,7 @@ struct kbase_ioctl_get_context_id { __u32 id; }; -#define KBASE_IOCTL_GET_CONTEXT_ID \ - _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) +#define KBASE_IOCTL_GET_CONTEXT_ID _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) /** * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd @@ -312,11 +301,9 @@ struct kbase_ioctl_tlstream_acquire { __u32 flags; }; -#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ - _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) +#define KBASE_IOCTL_TLSTREAM_ACQUIRE _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) -#define KBASE_IOCTL_TLSTREAM_FLUSH \ - _IO(KBASE_IOCTL_TYPE, 19) +#define KBASE_IOCTL_TLSTREAM_FLUSH _IO(KBASE_IOCTL_TYPE, 19) /** * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region @@ -333,8 +320,7 @@ struct kbase_ioctl_mem_commit { __u64 pages; }; -#define KBASE_IOCTL_MEM_COMMIT \ - _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) +#define KBASE_IOCTL_MEM_COMMIT _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) /** * union kbase_ioctl_mem_alias - Create an alias of memory regions @@ -362,8 +348,7 @@ union kbase_ioctl_mem_alias { } out; }; -#define KBASE_IOCTL_MEM_ALIAS \ - _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) +#define KBASE_IOCTL_MEM_ALIAS _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) /** * union kbase_ioctl_mem_import - Import memory for use by the GPU @@ -391,8 +376,7 @@ union kbase_ioctl_mem_import { } out; }; -#define KBASE_IOCTL_MEM_IMPORT \ - _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) +#define KBASE_IOCTL_MEM_IMPORT _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) /** * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region @@ -406,8 +390,7 @@ struct kbase_ioctl_mem_flags_change { __u64 mask; }; -#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ - _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) +#define KBASE_IOCTL_MEM_FLAGS_CHANGE _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) /** * struct kbase_ioctl_stream_create - Create a synchronisation stream @@ -424,8 +407,7 @@ struct kbase_ioctl_stream_create { char name[32]; }; -#define KBASE_IOCTL_STREAM_CREATE \ - _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) +#define KBASE_IOCTL_STREAM_CREATE _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) /** * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence @@ -435,8 +417,7 @@ struct kbase_ioctl_fence_validate { int fd; }; -#define KBASE_IOCTL_FENCE_VALIDATE \ - _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) +#define KBASE_IOCTL_FENCE_VALIDATE _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) /** * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel @@ -452,8 +433,7 @@ struct kbase_ioctl_mem_profile_add { __u32 padding; }; -#define KBASE_IOCTL_MEM_PROFILE_ADD \ - _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) +#define KBASE_IOCTL_MEM_PROFILE_ADD _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) /** * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource @@ -465,7 +445,7 @@ struct kbase_ioctl_sticky_resource_map { __u64 address; }; -#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ +#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map) /** @@ -479,7 +459,7 @@ struct kbase_ioctl_sticky_resource_unmap { __u64 address; }; -#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ +#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap) /** @@ -507,14 +487,12 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { } out; }; -#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ +#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) -#define KBASE_IOCTL_CINSTR_GWT_START \ - _IO(KBASE_IOCTL_TYPE, 33) +#define KBASE_IOCTL_CINSTR_GWT_START _IO(KBASE_IOCTL_TYPE, 33) -#define KBASE_IOCTL_CINSTR_GWT_STOP \ - _IO(KBASE_IOCTL_TYPE, 34) +#define KBASE_IOCTL_CINSTR_GWT_STOP _IO(KBASE_IOCTL_TYPE, 34) /** * union kbase_ioctl_cinstr_gwt_dump - Used to collect all GPU write fault @@ -547,8 +525,7 @@ union kbase_ioctl_cinstr_gwt_dump { } out; }; -#define KBASE_IOCTL_CINSTR_GWT_DUMP \ - _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) +#define KBASE_IOCTL_CINSTR_GWT_DUMP _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) /** * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone @@ -559,8 +536,7 @@ struct kbase_ioctl_mem_exec_init { __u64 va_pages; }; -#define KBASE_IOCTL_MEM_EXEC_INIT \ - _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) +#define KBASE_IOCTL_MEM_EXEC_INIT _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) /** * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of @@ -589,7 +565,7 @@ union kbase_ioctl_get_cpu_gpu_timeinfo { } out; }; -#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ +#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) /** @@ -601,7 +577,7 @@ struct kbase_ioctl_context_priority_check { __u8 priority; }; -#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ +#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check) /** @@ -613,7 +589,7 @@ struct kbase_ioctl_set_limited_core_count { __u8 max_core_count; }; -#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ +#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ _IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count) /** @@ -634,7 +610,7 @@ struct kbase_ioctl_kinstr_prfcnt_enum_info { __u64 info_list_ptr; }; -#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \ +#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \ _IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info) /** @@ -663,7 +639,7 @@ union kbase_ioctl_kinstr_prfcnt_setup { } out; }; -#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \ +#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \ _IOWR(KBASE_IOCTL_TYPE, 57, union kbase_ioctl_kinstr_prfcnt_setup) /*************** @@ -687,8 +663,7 @@ struct kbase_ioctl_tlstream_stats { __u32 bytes_generated; }; -#define KBASE_IOCTL_TLSTREAM_STATS \ - _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) +#define KBASE_IOCTL_TLSTREAM_STATS _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) #endif /* MALI_UNIT_TEST */ @@ -706,108 +681,107 @@ struct kbase_ioctl_tlstream_stats { * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) */ - /********************************** * Definitions for GPU properties * **********************************/ -#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) -#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) -#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) -#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) +#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) +#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) +#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) +#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) -#define KBASE_GPUPROP_PRODUCT_ID 1 -#define KBASE_GPUPROP_VERSION_STATUS 2 -#define KBASE_GPUPROP_MINOR_REVISION 3 -#define KBASE_GPUPROP_MAJOR_REVISION 4 +#define KBASE_GPUPROP_PRODUCT_ID 1 +#define KBASE_GPUPROP_VERSION_STATUS 2 +#define KBASE_GPUPROP_MINOR_REVISION 3 +#define KBASE_GPUPROP_MAJOR_REVISION 4 /* 5 previously used for GPU speed */ -#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 +#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 /* 7 previously used for minimum GPU speed */ -#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 -#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 -#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 -#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 -#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 +#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 +#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 +#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 +#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 +#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 -#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 -#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 -#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 +#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 +#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 +#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 -#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 -#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 +#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 +#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 -#define KBASE_GPUPROP_MAX_THREADS 18 -#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 -#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 -#define KBASE_GPUPROP_MAX_REGISTERS 21 -#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 -#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 -#define KBASE_GPUPROP_IMPL_TECH 24 +#define KBASE_GPUPROP_MAX_THREADS 18 +#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 +#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 +#define KBASE_GPUPROP_MAX_REGISTERS 21 +#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 +#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 +#define KBASE_GPUPROP_IMPL_TECH 24 -#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 -#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 -#define KBASE_GPUPROP_RAW_L2_PRESENT 27 -#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 -#define KBASE_GPUPROP_RAW_L2_FEATURES 29 -#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 -#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 -#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 -#define KBASE_GPUPROP_RAW_AS_PRESENT 33 -#define KBASE_GPUPROP_RAW_JS_PRESENT 34 -#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 -#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 -#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 -#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 -#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 -#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 -#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 -#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 -#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 -#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 -#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 -#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 -#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 -#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 -#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 -#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 -#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 -#define KBASE_GPUPROP_RAW_GPU_ID 55 -#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 -#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 -#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 -#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 -#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 +#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 +#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 +#define KBASE_GPUPROP_RAW_L2_PRESENT 27 +#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 +#define KBASE_GPUPROP_RAW_L2_FEATURES 29 +#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 +#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 +#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 +#define KBASE_GPUPROP_RAW_AS_PRESENT 33 +#define KBASE_GPUPROP_RAW_JS_PRESENT 34 +#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 +#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 +#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 +#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 +#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 +#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 +#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 +#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 +#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 +#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 +#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 +#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 +#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 +#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 +#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 +#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 +#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 +#define KBASE_GPUPROP_RAW_GPU_ID 55 +#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 +#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 +#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 +#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 +#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 -#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 -#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 -#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 -#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 -#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 -#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 -#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 -#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 -#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 -#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 -#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 -#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 -#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 -#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 -#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 -#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 -#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 -#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 -#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 +#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 +#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 +#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 +#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 +#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 +#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 +#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 +#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 +#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 +#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 +#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 +#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 +#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 +#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 +#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 +#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 +#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 +#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 +#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 -#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 +#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 -#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 +#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 -#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 -#define KBASE_GPUPROP_TLS_ALLOC 84 -#define KBASE_GPUPROP_RAW_GPU_FEATURES 85 +#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 +#define KBASE_GPUPROP_TLS_ALLOC 84 +#define KBASE_GPUPROP_RAW_GPU_FEATURES 85 #ifdef __cpluscplus } #endif From 26610e54331ffed6c6a1050f9ce72713c53b1ab0 Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Thu, 16 Mar 2023 21:18:25 +0800 Subject: [PATCH 30/79] cpufreq: dt-platdev: Add rk3562 project into blacklist Signed-off-by: Tao Huang Change-Id: Ic2d5eec24279339ac8e53aa690410ce2f30bf449 --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index e4fad3bd6b27..da2367dab097 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -142,6 +142,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "rockchip,rk3368", }, { .compatible = "rockchip,rk3399", }, { .compatible = "rockchip,rk3399pro", }, + { .compatible = "rockchip,rk3562", }, { .compatible = "rockchip,rk3566", }, { .compatible = "rockchip,rk3568", }, { .compatible = "rockchip,rk3588", }, From 16ecc189f1b89bae2b1878ca8baf16ebf4f2cb40 Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Thu, 16 Mar 2023 21:19:39 +0800 Subject: [PATCH 31/79] cpufreq: dt-platdev: Add rk3528 project into blacklist Signed-off-by: Tao Huang Change-Id: I59e394fb32be430659eaa36bf1924613ab032f5d --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index da2367dab097..2243cc9a3c1b 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -142,6 +142,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "rockchip,rk3368", }, { .compatible = "rockchip,rk3399", }, { .compatible = "rockchip,rk3399pro", }, + { .compatible = "rockchip,rk3528", }, { .compatible = "rockchip,rk3562", }, { .compatible = "rockchip,rk3566", }, { .compatible = "rockchip,rk3568", }, From b4789778e77c5e0f87a7332103e7126693a03a6c Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Thu, 16 Mar 2023 21:27:03 +0800 Subject: [PATCH 32/79] arm64: rockchip_gki.config: Enable CONFIG_CPU_RK3562 Signed-off-by: Tao Huang Change-Id: I5f6be242f581321c6da7d532ad379395ca116b6d --- arch/arm64/configs/rockchip_gki.config | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/rockchip_gki.config b/arch/arm64/configs/rockchip_gki.config index a8634841707b..7961879eddaa 100644 --- a/arch/arm64/configs/rockchip_gki.config +++ b/arch/arm64/configs/rockchip_gki.config @@ -30,6 +30,7 @@ CONFIG_COMPASS_DEVICE=m CONFIG_CPUFREQ_DT=m CONFIG_CPU_FREQ_GOV_ONDEMAND=m CONFIG_CPU_FREQ_GOV_USERSPACE=m +CONFIG_CPU_RK3562=y CONFIG_CPU_RK3568=y CONFIG_CPU_RK3588=y CONFIG_CRYPTO_AES_ARM64_CE_CCM=m From 0a880404265cc8a7c9f9c3e37d3361d46329c612 Mon Sep 17 00:00:00 2001 From: Finley Xiao Date: Fri, 17 Mar 2023 14:47:04 +0800 Subject: [PATCH 33/79] arm64: dts: rockchip: rk3562-evb: Enable dmc Signed-off-by: Finley Xiao Change-Id: Ie77aa4dfaf38e0d689899eb8b6cdb0bfc392907e --- arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi b/arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi index 0736756503de..c37634e9e528 100644 --- a/arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi @@ -137,10 +137,19 @@ cpu-supply = <&vdd_cpu>; }; +&dfi { + status = "okay"; +}; + &display_subsystem { status = "okay"; }; +&dmc { + center-supply = <&vdd_logic>; + status = "okay"; +}; + &dsi { status = "disabled"; //rockchip,lane-rate = <1000>; From 465c984709ac6eb052ed1246f2fd0ad7b83ef628 Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Fri, 17 Mar 2023 16:26:48 +0800 Subject: [PATCH 34/79] arm64: dts: rockchip: rk3562-evb: Sort the nodes alphabetically Signed-off-by: Tao Huang Change-Id: I7572489700df436b2c186746937db584c1daea22 --- arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi b/arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi index c37634e9e528..1e5aa02153b4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3562-evb.dtsi @@ -531,6 +531,14 @@ status = "okay"; }; +&rga2 { + status = "okay"; +}; + +&rga2_mmu { + status = "okay"; +}; + &rknpu { rknpu-supply = <&vdd_npu>; status = "okay"; @@ -540,14 +548,6 @@ status = "okay"; }; -&rga2 { - status = "okay"; -}; - -&rga2_mmu { - status = "okay"; -}; - &rkvdec { status = "okay"; }; From 0d66d4c2841b3b0208e841183278e7d38ddab43b Mon Sep 17 00:00:00 2001 From: Finley Xiao Date: Fri, 17 Mar 2023 14:50:42 +0800 Subject: [PATCH 35/79] arm64: dts: rockchip: rk3562-rk817-tablet-v10: Enable dmc Signed-off-by: Finley Xiao Change-Id: I2f3f1a5aada9c10d3e07d6367b342064c841c439 --- arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts b/arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts index cf0e27a0bba2..e33d8fba2734 100644 --- a/arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts @@ -209,10 +209,19 @@ cpu-supply = <&vdd_cpu>; }; +&dfi { + status = "okay"; +}; + &display_subsystem { status = "okay"; }; +&dmc { + center-supply = <&vdd_logic>; + status = "okay"; +}; + &dsi { status = "okay"; From aa8ef2bf5025ab8da2a2ce46a5602913fcf90d1a Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Fri, 17 Mar 2023 16:46:26 +0800 Subject: [PATCH 36/79] arm64: dts: rockchip: rk3562-rk817-tablet-v10: Sort the nodes alphabetically Signed-off-by: Tao Huang Change-Id: I9bb4a6125c16b144f9105c3d84c4e6b10c2794d1 --- .../dts/rockchip/rk3562-rk817-tablet-v10.dts | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts b/arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts index e33d8fba2734..4f9edc072df7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3562-rk817-tablet-v10.dts @@ -853,16 +853,15 @@ }; }; -&sai0 { +&jpegd { status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&i2s0m0_lrck - &i2s0m0_sclk - &i2s0m0_sdi0 - &i2s0m0_sdo0>; }; -&video_phy { +&jpegd_mmu { + status = "okay"; +}; + +&mpp_srv { status = "okay"; }; @@ -915,19 +914,6 @@ }; }; - -&jpegd { - status = "okay"; -}; - -&jpegd_mmu { - status = "okay"; -}; - -&mpp_srv { - status = "okay"; -}; - &pwm5 { status = "okay"; }; @@ -968,6 +954,15 @@ status = "okay"; }; +&sai0 { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&i2s0m0_lrck + &i2s0m0_sclk + &i2s0m0_sdi0 + &i2s0m0_sdo0>; +}; + &saradc0 { status = "okay"; vref-supply = <&vcc_1v8>; @@ -1052,6 +1047,10 @@ snps,usb2-lpm-disable; }; +&video_phy { + status = "okay"; +}; + &vop { status = "okay"; }; From cdc0984c90dc81b891a9576aa6462d3ca0a04a21 Mon Sep 17 00:00:00 2001 From: Algea Cao Date: Fri, 17 Mar 2023 15:32:25 +0800 Subject: [PATCH 37/79] drm/bridge: synopsys: dw-hdmi-qp: Support read ext block edid Signed-off-by: Algea Cao Change-Id: I4dbf6890032953e109863a86deb38fc1094e6938 --- drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index 071709d1848d..c08ec5cfb307 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c @@ -962,8 +962,12 @@ static int dw_hdmi_i2c_read(struct dw_hdmi_qp *hdmi, hdmi_modb(hdmi, i2c->slave_reg++ << 12, I2CM_ADDR, I2CM_INTERFACE_CONTROL0); - hdmi_modb(hdmi, I2CM_FM_READ, I2CM_WR_MASK, - I2CM_INTERFACE_CONTROL0); + if (i2c->is_segment) + hdmi_modb(hdmi, I2CM_EXT_READ, I2CM_WR_MASK, + I2CM_INTERFACE_CONTROL0); + else + hdmi_modb(hdmi, I2CM_FM_READ, I2CM_WR_MASK, + I2CM_INTERFACE_CONTROL0); stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10); if (!stat) { @@ -1086,7 +1090,7 @@ static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap, i2c->is_segment = true; hdmi_modb(hdmi, DDC_SEGMENT_ADDR, I2CM_SEG_ADDR, I2CM_INTERFACE_CONTROL1); - hdmi_modb(hdmi, *msgs[i].buf, I2CM_SEG_PTR, + hdmi_modb(hdmi, *msgs[i].buf << 7, I2CM_SEG_PTR, I2CM_INTERFACE_CONTROL1); } else { if (msgs[i].flags & I2C_M_RD) From 80fa2be8ed7af968ac9328f2081b17cffded29d0 Mon Sep 17 00:00:00 2001 From: Zefa Chen Date: Fri, 10 Mar 2023 18:49:13 +0800 Subject: [PATCH 38/79] media: rockchip: vicap refresh cache before memcpy for tools video Signed-off-by: Zefa Chen Change-Id: I3cfddddd41fe37e9a04547e52abeb337b556aeed --- drivers/media/platform/rockchip/cif/capture.c | 4 ++-- drivers/media/platform/rockchip/cif/cif-tools.c | 10 +++++++--- drivers/media/platform/rockchip/cif/dev.h | 3 +++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/rockchip/cif/capture.c b/drivers/media/platform/rockchip/cif/capture.c index d6055b0c8828..4bd1a244ab5f 100644 --- a/drivers/media/platform/rockchip/cif/capture.c +++ b/drivers/media/platform/rockchip/cif/capture.c @@ -6502,8 +6502,8 @@ static const struct v4l2_ioctl_ops rkcif_v4l2_ioctl_ops = { .vidioc_default = rkcif_ioctl_default, }; -static void rkcif_vb_done_oneframe(struct rkcif_stream *stream, - struct vb2_v4l2_buffer *vb_done) +void rkcif_vb_done_oneframe(struct rkcif_stream *stream, + struct vb2_v4l2_buffer *vb_done) { const struct cif_output_fmt *fmt = stream->cif_fmt_out; u32 i; diff --git a/drivers/media/platform/rockchip/cif/cif-tools.c b/drivers/media/platform/rockchip/cif/cif-tools.c index 5c212c034657..dedd26641ab5 100644 --- a/drivers/media/platform/rockchip/cif/cif-tools.c +++ b/drivers/media/platform/rockchip/cif/cif-tools.c @@ -636,21 +636,21 @@ retry_done_buf: if (tools_vdev->stopping) { rkcif_tools_stop(tools_vdev); tools_vdev->stopping = false; - rkcif_vb_done_tasklet(stream, buf); + rkcif_vb_done_oneframe(stream, &buf->vb); spin_lock_irqsave(&tools_vdev->vbq_lock, flags); while (!list_empty(&tools_vdev->buf_done_head)) { buf = list_first_entry(&tools_vdev->buf_done_head, struct rkcif_buffer, queue); if (buf) { list_del(&buf->queue); - rkcif_vb_done_tasklet(stream, buf); + rkcif_vb_done_oneframe(stream, &buf->vb); } } spin_unlock_irqrestore(&tools_vdev->vbq_lock, flags); wake_up(&tools_vdev->wq_stopped); return; } - rkcif_vb_done_tasklet(stream, buf); + rkcif_vb_done_oneframe(stream, &buf->vb); if (!list_empty(&tools_vdev->buf_head)) { tools_vdev->curr_buf = list_first_entry(&tools_vdev->buf_head, @@ -675,6 +675,10 @@ retry_done_buf: if (!src || !dst) break; + + if (buf->vb.vb2_buf.vb2_queue->mem_ops->finish) + buf->vb.vb2_buf.vb2_queue->mem_ops->finish(buf->vb.vb2_buf.planes[i].mem_priv); + vb2_set_plane_payload(&tools_vdev->curr_buf->vb.vb2_buf, i, payload_size); memcpy(dst, src, payload_size); diff --git a/drivers/media/platform/rockchip/cif/dev.h b/drivers/media/platform/rockchip/cif/dev.h index 333bb90ec033..f50857c87225 100644 --- a/drivers/media/platform/rockchip/cif/dev.h +++ b/drivers/media/platform/rockchip/cif/dev.h @@ -884,6 +884,9 @@ void rkcif_config_dvp_clk_sampling_edge(struct rkcif_device *dev, void rkcif_enable_dvp_clk_dual_edge(struct rkcif_device *dev, bool on); void rkcif_reset_work(struct work_struct *work); +void rkcif_vb_done_oneframe(struct rkcif_stream *stream, + struct vb2_v4l2_buffer *vb_done); + int rkcif_init_rx_buf(struct rkcif_stream *stream, int buf_num); void rkcif_free_rx_buf(struct rkcif_stream *stream, int buf_num); From 7201f0928f0b5a06ee9ea9728fc221f940af1a38 Mon Sep 17 00:00:00 2001 From: Zefa Chen Date: Fri, 10 Mar 2023 18:00:15 +0800 Subject: [PATCH 39/79] media: rockchip: vicap fixes error of overflow when stop stream some sensor driver not support get vblank, may cause error state Signed-off-by: Zefa Chen Change-Id: I04b869026d262283e055c82b1e9d53233bab3b67 --- drivers/media/platform/rockchip/cif/capture.c | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/media/platform/rockchip/cif/capture.c b/drivers/media/platform/rockchip/cif/capture.c index 4bd1a244ab5f..349f92322d1c 100644 --- a/drivers/media/platform/rockchip/cif/capture.c +++ b/drivers/media/platform/rockchip/cif/capture.c @@ -4386,29 +4386,31 @@ void rkcif_do_stop_stream(struct rkcif_stream *stream, stream->id, stream->cur_stream_mode, mode); if (mode == stream->cur_stream_mode) { - stream->stopping = true; if (stream->dma_en) { if (!dev->sensor_linetime) dev->sensor_linetime = rkcif_get_linetime(stream); vblank = rkcif_get_sensor_vblank(dev); - frame_time_ns = (vblank + dev->terminal_sensor.raw_rect.height) * - dev->sensor_linetime; - spin_lock_irqsave(&stream->fps_lock, flags); - fs_time = stream->readout.fs_timestamp; - spin_unlock_irqrestore(&stream->fps_lock, flags); - cur_time = ktime_get_ns(); - if (cur_time > fs_time && - cur_time - fs_time < (frame_time_ns - 10000000)) { - spin_lock_irqsave(&stream->vbq_lock, flags); - if (stream->dma_en & RKCIF_DMAEN_BY_VICAP) - stream->to_stop_dma = RKCIF_DMAEN_BY_VICAP; - else if (stream->dma_en & RKCIF_DMAEN_BY_ISP) - stream->to_stop_dma = RKCIF_DMAEN_BY_ISP; - stream->is_stop_capture = true; - rkcif_stop_dma_capture(stream); - spin_unlock_irqrestore(&stream->vbq_lock, flags); + if (vblank) { + frame_time_ns = (vblank + dev->terminal_sensor.raw_rect.height) * + dev->sensor_linetime; + spin_lock_irqsave(&stream->fps_lock, flags); + fs_time = stream->readout.fs_timestamp; + spin_unlock_irqrestore(&stream->fps_lock, flags); + cur_time = ktime_get_ns(); + if (cur_time > fs_time && + cur_time - fs_time < (frame_time_ns - 10000000)) { + spin_lock_irqsave(&stream->vbq_lock, flags); + if (stream->dma_en & RKCIF_DMAEN_BY_VICAP) + stream->to_stop_dma = RKCIF_DMAEN_BY_VICAP; + else if (stream->dma_en & RKCIF_DMAEN_BY_ISP) + stream->to_stop_dma = RKCIF_DMAEN_BY_ISP; + stream->is_stop_capture = true; + rkcif_stop_dma_capture(stream); + spin_unlock_irqrestore(&stream->vbq_lock, flags); + } } } + stream->stopping = true; ret = wait_event_timeout(stream->wq_stopped, stream->state != RKCIF_STATE_STREAMING, msecs_to_jiffies(500)); From aff37bd786d022f7143455a540392216a386676a Mon Sep 17 00:00:00 2001 From: Zefa Chen Date: Wed, 22 Feb 2023 14:25:41 +0800 Subject: [PATCH 40/79] arm64: dts: rockchip: rk3588: modify vicap/csi2 clk/rst Signed-off-by: Zefa Chen Change-Id: I7bac029766591565590e3ca98c7348f53d21b0c6 --- arch/arm64/boot/dts/rockchip/rk3588.dtsi | 8 ++--- arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 37 ++++++++++++++--------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588.dtsi b/arch/arm64/boot/dts/rockchip/rk3588.dtsi index 51cea98520ce..bc61ede3db6c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588.dtsi @@ -199,8 +199,8 @@ interrupt-names = "csi-intr1", "csi-intr2"; clocks = <&cru PCLK_CSI_HOST_4>; clock-names = "pclk_csi2host"; - resets = <&cru SRST_P_CSI_HOST_4>, <&cru SRST_CSIHOST4_VICAP>; - reset-names = "srst_csihost_p", "srst_csihost_vicap"; + resets = <&cru SRST_P_CSI_HOST_4>; + reset-names = "srst_csihost_p"; status = "disabled"; }; @@ -213,8 +213,8 @@ interrupt-names = "csi-intr1", "csi-intr2"; clocks = <&cru PCLK_CSI_HOST_5>; clock-names = "pclk_csi2host"; - resets = <&cru SRST_P_CSI_HOST_5>, <&cru SRST_CSIHOST5_VICAP>; - reset-names = "srst_csihost_p", "srst_csihost_vicap"; + resets = <&cru SRST_P_CSI_HOST_5>; + reset-names = "srst_csihost_p"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 35e456b46f72..592965e38ad3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -3769,10 +3769,17 @@ reg-names = "cif_regs"; interrupts = ; interrupt-names = "cif-intr"; - clocks = <&cru ACLK_VICAP>, <&cru HCLK_VICAP>, <&cru DCLK_VICAP>; - clock-names = "aclk_cif", "hclk_cif", "dclk_cif"; - resets = <&cru SRST_A_VICAP>, <&cru SRST_H_VICAP>, <&cru SRST_D_VICAP>; - reset-names = "rst_cif_a", "rst_cif_h", "rst_cif_d"; + clocks = <&cru ACLK_VICAP>, <&cru HCLK_VICAP>, <&cru DCLK_VICAP>, + <&cru ICLK_CSIHOST0>, <&cru ICLK_CSIHOST1>; + clock-names = "aclk_cif", "hclk_cif", "dclk_cif", + "iclk_host0", "iclk_host1"; + resets = <&cru SRST_A_VICAP>, <&cru SRST_H_VICAP>, <&cru SRST_D_VICAP>, + <&cru SRST_CSIHOST0_VICAP>, <&cru SRST_CSIHOST1_VICAP>, + <&cru SRST_CSIHOST2_VICAP>, <&cru SRST_CSIHOST3_VICAP>, + <&cru SRST_CSIHOST4_VICAP>, <&cru SRST_CSIHOST5_VICAP>; + reset-names = "rst_cif_a", "rst_cif_h", "rst_cif_d", + "rst_cif_host0", "rst_cif_host1", "rst_cif_host2", + "rst_cif_host3", "rst_cif_host4", "rst_cif_host5"; assigned-clocks = <&cru DCLK_VICAP>; assigned-clock-rates = <600000000>; power-domains = <&power RK3588_PD_VI>; @@ -3802,10 +3809,10 @@ interrupts = , ; interrupt-names = "csi-intr1", "csi-intr2"; - clocks = <&cru PCLK_CSI_HOST_0>, <&cru ICLK_CSIHOST0>; - clock-names = "pclk_csi2host", "iclk_csi2host"; - resets = <&cru SRST_P_CSI_HOST_0>, <&cru SRST_CSIHOST0_VICAP>; - reset-names = "srst_csihost_p", "srst_csihost_vicap"; + clocks = <&cru PCLK_CSI_HOST_0>; + clock-names = "pclk_csi2host"; + resets = <&cru SRST_P_CSI_HOST_0>; + reset-names = "srst_csihost_p"; status = "disabled"; }; @@ -3816,9 +3823,9 @@ interrupts = , ; interrupt-names = "csi-intr1", "csi-intr2"; - clocks = <&cru PCLK_CSI_HOST_1>, <&cru ICLK_CSIHOST1>; - clock-names = "pclk_csi2host", "iclk_csi2host"; - resets = <&cru SRST_P_CSI_HOST_1>, <&cru SRST_CSIHOST1_VICAP>; + clocks = <&cru PCLK_CSI_HOST_1>; + clock-names = "pclk_csi2host"; + resets = <&cru SRST_P_CSI_HOST_1>; reset-names = "srst_csihost_p", "srst_csihost_vicap"; status = "disabled"; }; @@ -3832,8 +3839,8 @@ interrupt-names = "csi-intr1", "csi-intr2"; clocks = <&cru PCLK_CSI_HOST_2>; clock-names = "pclk_csi2host"; - resets = <&cru SRST_P_CSI_HOST_2>, <&cru SRST_CSIHOST2_VICAP>; - reset-names = "srst_csihost_p", "srst_csihost_vicap"; + resets = <&cru SRST_P_CSI_HOST_2>; + reset-names = "srst_csihost_p"; status = "disabled"; }; @@ -3846,8 +3853,8 @@ interrupt-names = "csi-intr1", "csi-intr2"; clocks = <&cru PCLK_CSI_HOST_3>; clock-names = "pclk_csi2host"; - resets = <&cru SRST_P_CSI_HOST_3>, <&cru SRST_CSIHOST3_VICAP>; - reset-names = "srst_csihost_p", "srst_csihost_vicap"; + resets = <&cru SRST_P_CSI_HOST_3>; + reset-names = "srst_csihost_p"; status = "disabled"; }; From 43526c2435a78fddc02873b1bdc4ac4b0443af50 Mon Sep 17 00:00:00 2001 From: Zefa Chen Date: Wed, 22 Feb 2023 14:28:02 +0800 Subject: [PATCH 41/79] media: rockchip: vicap: rk3588 add iclk_host and rst_host if not control it together, may cause vicap do cru reset fail Signed-off-by: Zefa Chen Change-Id: Ie8cc81688ab3c0d1d91b8aa27992b028f8aee80c --- drivers/media/platform/rockchip/cif/hw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/media/platform/rockchip/cif/hw.c b/drivers/media/platform/rockchip/cif/hw.c index 5724ffa81eb9..4fcb25925a0e 100644 --- a/drivers/media/platform/rockchip/cif/hw.c +++ b/drivers/media/platform/rockchip/cif/hw.c @@ -604,12 +604,20 @@ static const char * const rk3588_cif_clks[] = { "aclk_cif", "hclk_cif", "dclk_cif", + "iclk_host0", + "iclk_host1", }; static const char * const rk3588_cif_rsts[] = { "rst_cif_a", "rst_cif_h", "rst_cif_d", + "rst_cif_host0", + "rst_cif_host1", + "rst_cif_host2", + "rst_cif_host3", + "rst_cif_host4", + "rst_cif_host5", }; static const struct cif_reg rk3588_cif_regs[] = { From 9aa8154c250231396d4091d8660147694c249f3d Mon Sep 17 00:00:00 2001 From: Yandong Lin Date: Thu, 16 Mar 2023 17:17:54 +0800 Subject: [PATCH 42/79] video: rockchip: mpp: fix some buf cache not sync issue Signed-off-by: Yandong Lin Change-Id: I3c41ca6e373480e3b2759154ca4445318c7365b6 --- drivers/video/rockchip/mpp/mpp_rkvenc2.c | 8 +++----- drivers/video/rockchip/mpp/mpp_vepu2.c | 6 ++---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/video/rockchip/mpp/mpp_rkvenc2.c b/drivers/video/rockchip/mpp/mpp_rkvenc2.c index 616e511c5c92..b3edf3c33525 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvenc2.c +++ b/drivers/video/rockchip/mpp/mpp_rkvenc2.c @@ -958,11 +958,9 @@ static void *rkvenc_alloc_task(struct mpp_session *session, struct mpp_dma_buffer *bs_buf = mpp_dma_find_buffer_fd(session->dma, fd_bs); - if (bs_buf && task->offset_bs > 0) { - mpp_dma_buf_sync(bs_buf, 0, task->offset_bs, - DMA_TO_DEVICE, false); - task->bs_buf = bs_buf; - } + if (bs_buf && task->offset_bs > 0) + mpp_dma_buf_sync(bs_buf, 0, task->offset_bs, DMA_TO_DEVICE, false); + task->bs_buf = bs_buf; } } rkvenc2_setup_task_id(session->index, task); diff --git a/drivers/video/rockchip/mpp/mpp_vepu2.c b/drivers/video/rockchip/mpp/mpp_vepu2.c index 1dbff8bb733b..2c1f775ba95c 100644 --- a/drivers/video/rockchip/mpp/mpp_vepu2.c +++ b/drivers/video/rockchip/mpp/mpp_vepu2.c @@ -203,11 +203,9 @@ static int vepu_process_reg_fd(struct mpp_session *session, struct mpp_dma_buffer *bs_buf = mpp_dma_find_buffer_fd(session->dma, fd_bs); task->offset_bs = mpp_query_reg_offset_info(&task->off_inf, VEPU2_REG_OUT_INDEX); - if (bs_buf && task->offset_bs > 0) { + if (bs_buf && task->offset_bs > 0) mpp_dma_buf_sync(bs_buf, 0, task->offset_bs, DMA_TO_DEVICE, false); - task->bs_buf = bs_buf; - } - + task->bs_buf = bs_buf; } return 0; From b278cacd807ea65a58f22d092be22d938aed0a29 Mon Sep 17 00:00:00 2001 From: Yandong Lin Date: Fri, 17 Mar 2023 11:02:35 +0800 Subject: [PATCH 43/79] video: rockchip: mpp: fix access null point in no mmu case Signed-off-by: Yandong Lin Change-Id: I02445a25b68b2ad8e80b46dc5ffc44ef422ab616 --- drivers/video/rockchip/mpp/mpp_iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/video/rockchip/mpp/mpp_iommu.c b/drivers/video/rockchip/mpp/mpp_iommu.c index 0b9c8483801d..dd6a7473480d 100644 --- a/drivers/video/rockchip/mpp/mpp_iommu.c +++ b/drivers/video/rockchip/mpp/mpp_iommu.c @@ -589,6 +589,9 @@ int mpp_iommu_dev_activate(struct mpp_iommu_info *info, struct mpp_dev *dev) unsigned long flags; int ret = 0; + if (!info) + return 0; + spin_lock_irqsave(&info->dev_lock, flags); if (info->dev_active || !dev) { @@ -614,6 +617,9 @@ int mpp_iommu_dev_deactivate(struct mpp_iommu_info *info, struct mpp_dev *dev) { unsigned long flags; + if (!info) + return 0; + spin_lock_irqsave(&info->dev_lock, flags); if (info->dev_active != dev) From 6b3b4b544ebf6ef504794f99328e1377a9eedfd0 Mon Sep 17 00:00:00 2001 From: Sandy Huang Date: Fri, 17 Mar 2023 14:57:50 +0800 Subject: [PATCH 44/79] drm/rockchip: vop3: remove vp1 supported from rk3562 Signed-off-by: Sandy Huang Change-Id: I334a2d693291c72143bd4cd2d8de38ddba1116f3 --- drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 05d9f1e052ee..d9aae6a98560 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -1108,19 +1108,6 @@ static const struct vop2_video_port_data rk3562_vop_video_ports[] = { .regs = &rk3562_vop_vp0_regs, .ovl_regs = &rk3528_vop_vp0_ovl_regs, }, - { - .id = 1, - .soc_id = { 0x3562, 0x3562 }, - .lut_dma_rid = 14, - .feature = VOP_FEATURE_ALPHA_SCALE | VOP_FEATURE_OVERSCAN, - .gamma_lut_len = 1024, - .max_output = { 2048, 4096 }, - .win_dly = 8, - .layer_mix_dly = 8, - .intr = &rk3568_vp1_intr, - .regs = &rk3562_vop_vp1_regs, - .ovl_regs = &rk3528_vop_vp1_ovl_regs, - }, }; static const struct vop2_video_port_regs rk3568_vop_vp0_regs = { @@ -3764,7 +3751,7 @@ static const struct vop2_data rk3528_vop = { static const struct vop2_data rk3562_vop = { .version = VOP_VERSION_RK3562, - .nr_vps = 2, + .nr_vps = ARRAY_SIZE(rk3562_vop_video_ports), .nr_mixers = 3, .nr_layers = 4, .nr_gammas = 2, From 68c35de1b637abc53998f409d572e37ef034d433 Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Fri, 17 Mar 2023 18:19:28 +0800 Subject: [PATCH 45/79] arm64: dts: rockchip: rk3528-evb1-ddr4-v10: Sort the nodes alphabetically Signed-off-by: Tao Huang Change-Id: Ib883bfc269906460b1fa93112920cbeea4180ac2 --- arch/arm64/boot/dts/rockchip/rk3528-evb1-ddr4-v10.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3528-evb1-ddr4-v10.dtsi b/arch/arm64/boot/dts/rockchip/rk3528-evb1-ddr4-v10.dtsi index 73ed29be46d5..6aa12f9d1667 100644 --- a/arch/arm64/boot/dts/rockchip/rk3528-evb1-ddr4-v10.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3528-evb1-ddr4-v10.dtsi @@ -49,10 +49,6 @@ status = "okay"; }; -&i2c6 { - status = "okay"; -}; - &gmac1 { /* Use rgmii-rxid mode to disable rx delay inside Soc */ phy-mode = "rgmii-rxid"; @@ -77,6 +73,10 @@ status = "okay"; }; +&i2c6 { + status = "okay"; +}; + &mdio1 { rgmii_phy: phy@1 { compatible = "ethernet-phy-ieee802.3-c22"; From 0eead2352fed2922cebba28dbd6b7f7efcdf5e08 Mon Sep 17 00:00:00 2001 From: Yandong Lin Date: Thu, 8 Sep 2022 09:34:52 +0800 Subject: [PATCH 46/79] arm64: dts: rockchip: rkvdec add hw ccu mode for rk3588 Signed-off-by: Yandong Lin Change-Id: I926f3afb22cce40ba4cd884ea4f96a2e2f420b66 --- arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 592965e38ad3..07fb66eaa8c0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -47,6 +47,8 @@ rkcif_mipi_lvds1= &rkcif_mipi_lvds1; rkcif_mipi_lvds2= &rkcif_mipi_lvds2; rkcif_mipi_lvds3= &rkcif_mipi_lvds3; + rkvdec0 = &rkvdec0; + rkvdec1 = &rkvdec1; rkvenc0 = &rkvenc0; rkvenc1 = &rkvenc1; jpege0 = &jpege0; @@ -3472,6 +3474,8 @@ resets = <&cru SRST_A_RKVDEC_CCU>; reset-names = "video_ccu"; rockchip,skip-pmu-idle-request; + /* 1: soft ccu 2: hw ccu */ + rockchip,ccu-mode = <1>; power-domains = <&power RK3588_PD_RKVDEC0>; status = "disabled"; }; @@ -3501,10 +3505,14 @@ rockchip,srv = <&mpp_srv>; rockchip,ccu = <&rkvdec_ccu>; rockchip,core-mask = <0x00010001>; + rockchip,task-capacity = <16>; rockchip,taskqueue-node = <9>; rockchip,sram = <&rkvdec0_sram>; /* rcb_iova: start and size 1M@4095M */ rockchip,rcb-iova = <0xFFF00000 0x100000>; + rockchip,rcb-info = <136 24576>, <137 49152>, <141 90112>, <140 49152>, + <139 180224>, <133 49152>, <134 8192>, <135 4352>, + <138 13056>, <142 291584>; rockchip,rcb-min-width = <512>; power-domains = <&power RK3588_PD_RKVDEC0>; status = "disabled"; @@ -3551,10 +3559,14 @@ rockchip,srv = <&mpp_srv>; rockchip,ccu = <&rkvdec_ccu>; rockchip,core-mask = <0x00020002>; + rockchip,task-capacity = <16>; rockchip,taskqueue-node = <9>; rockchip,sram = <&rkvdec1_sram>; /* rcb_iova: start and size 1M@4094M */ rockchip,rcb-iova = <0xFFE00000 0x100000>; + rockchip,rcb-info = <136 24576>, <137 49152>, <141 90112>, <140 49152>, + <139 180224>, <133 49152>, <134 8192>, <135 4352>, + <138 13056>, <142 291584>; rockchip,rcb-min-width = <512>; power-domains = <&power RK3588_PD_RKVDEC1>; status = "disabled"; From e9490005011ecd63cdc5fc82d03d6443993dd887 Mon Sep 17 00:00:00 2001 From: Ding Wei Date: Wed, 26 Jan 2022 09:58:27 +0800 Subject: [PATCH 47/79] video: rockchip: mpp: rkvdec2: Add hard-ccu mode hard-ccu mode: 1. ccu hardware assign the tasks to each core. 2. if one core error, wait dule core idle, and reset dual core and ccu. 3. re-run tasks which have not finish. Change-Id: I3476c0de21de89019ce924122a85ab4352faafd0 Signed-off-by: Ding Wei --- drivers/video/rockchip/mpp/mpp_common.c | 14 +- drivers/video/rockchip/mpp/mpp_common.h | 2 + drivers/video/rockchip/mpp/mpp_rkvdec2.c | 79 +- drivers/video/rockchip/mpp/mpp_rkvdec2.h | 10 + drivers/video/rockchip/mpp/mpp_rkvdec2_link.c | 731 +++++++++++++++++- drivers/video/rockchip/mpp/mpp_rkvdec2_link.h | 23 + 6 files changed, 813 insertions(+), 46 deletions(-) diff --git a/drivers/video/rockchip/mpp/mpp_common.c b/drivers/video/rockchip/mpp/mpp_common.c index 043a5b3b41b3..3717b4fa83eb 100644 --- a/drivers/video/rockchip/mpp/mpp_common.c +++ b/drivers/video/rockchip/mpp/mpp_common.c @@ -2131,15 +2131,9 @@ int mpp_dev_probe(struct mpp_dev *mpp, return -ENODEV; } - if (mpp->task_capacity == 1) { - /* power domain autosuspend delay 2s */ - pm_runtime_set_autosuspend_delay(dev, 2000); - pm_runtime_use_autosuspend(dev); - } else { - dev_info(dev, "link mode task capacity %d\n", - mpp->task_capacity); - /* do not setup autosuspend on multi task device */ - } + /* power domain autosuspend delay 2s */ + pm_runtime_set_autosuspend_delay(dev, 2000); + pm_runtime_use_autosuspend(dev); kthread_init_work(&mpp->work, mpp_task_worker_default); @@ -2150,7 +2144,6 @@ int mpp_dev_probe(struct mpp_dev *mpp, device_init_wakeup(dev, true); pm_runtime_enable(dev); - mpp->irq = platform_get_irq(pdev, 0); if (mpp->irq < 0) { dev_err(dev, "No interrupt resource found\n"); @@ -2177,6 +2170,7 @@ int mpp_dev_probe(struct mpp_dev *mpp, ret = -ENOMEM; goto failed; } + mpp->io_base = res->start; /* * TODO: here or at the device itself, some device does not diff --git a/drivers/video/rockchip/mpp/mpp_common.h b/drivers/video/rockchip/mpp/mpp_common.h index f204d35a17dd..a4082ac4a4a8 100644 --- a/drivers/video/rockchip/mpp/mpp_common.h +++ b/drivers/video/rockchip/mpp/mpp_common.h @@ -348,6 +348,7 @@ struct mpp_dev { struct mpp_iommu_info *iommu_info; int (*fault_handler)(struct iommu_domain *iommu, struct device *iommu_dev, unsigned long iova, int status, void *arg); + resource_size_t io_base; atomic_t reset_request; atomic_t session_index; @@ -543,6 +544,7 @@ struct mpp_taskqueue { u32 core_id_max; u32 core_count; unsigned long dev_active_flags; + u32 iommu_fault; }; struct mpp_reset_group { diff --git a/drivers/video/rockchip/mpp/mpp_rkvdec2.c b/drivers/video/rockchip/mpp/mpp_rkvdec2.c index 780e2a364030..990561f1c0d5 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvdec2.c +++ b/drivers/video/rockchip/mpp/mpp_rkvdec2.c @@ -307,7 +307,7 @@ void *rkvdec2_alloc_task(struct mpp_session *session, } static void *rkvdec2_rk3568_alloc_task(struct mpp_session *session, - struct mpp_task_msgs *msgs) + struct mpp_task_msgs *msgs) { u32 fmt; struct mpp_task *mpp_task = NULL; @@ -1349,15 +1349,26 @@ static int rkvdec2_ccu_probe(struct platform_device *pdev) struct rkvdec2_ccu *ccu; struct resource *res; struct device *dev = &pdev->dev; + u32 ccu_mode; ccu = devm_kzalloc(dev, sizeof(*ccu), GFP_KERNEL); if (!ccu) return -ENOMEM; ccu->dev = dev; + /* use task-level soft ccu default */ + ccu->ccu_mode = RKVDEC2_CCU_TASK_SOFT; atomic_set(&ccu->power_enabled, 0); + INIT_LIST_HEAD(&ccu->unused_list); + INIT_LIST_HEAD(&ccu->used_list); platform_set_drvdata(pdev, ccu); + if (!of_property_read_u32(dev->of_node, "rockchip,ccu-mode", &ccu_mode)) { + if (ccu_mode <= RKVDEC2_CCU_MODE_NULL || ccu_mode >= RKVDEC2_CCU_MODE_BUTT) + ccu_mode = RKVDEC2_CCU_TASK_SOFT; + ccu->ccu_mode = (enum RKVDEC2_CCU_MODE)ccu_mode; + } + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ccu"); if (!res) { dev_err(dev, "no memory resource defined\n"); @@ -1370,12 +1381,6 @@ static int rkvdec2_ccu_probe(struct platform_device *pdev) return -ENODEV; } - device_init_wakeup(dev, true); - pm_runtime_enable(dev); - /* power domain autosuspend delay 2s */ - pm_runtime_set_autosuspend_delay(dev, 2000); - pm_runtime_use_autosuspend(dev); - ccu->aclk_info.clk = devm_clk_get(dev, "aclk_ccu"); if (!ccu->aclk_info.clk) mpp_err("failed on clk_get ccu aclk\n"); @@ -1386,6 +1391,13 @@ static int rkvdec2_ccu_probe(struct platform_device *pdev) else mpp_err("failed on clk_get ccu reset\n"); + /* power domain autosuspend delay 2s */ + pm_runtime_set_autosuspend_delay(dev, 2000); + pm_runtime_use_autosuspend(dev); + device_init_wakeup(dev, true); + pm_runtime_enable(dev); + + dev_info(dev, "ccu-mode: %d\n", ccu->ccu_mode); return 0; } @@ -1482,6 +1494,22 @@ static int rkvdec2_alloc_rcbbuf(struct platform_device *pdev, struct rkvdec2_dev if (!ret && dec->rcb_min_width) dev_info(dev, "min_width %u\n", dec->rcb_min_width); + /* if have, read rcb_info */ + dec->rcb_info_count = device_property_count_u32(dev, "rockchip,rcb-info"); + if (dec->rcb_info_count > 0 && + dec->rcb_info_count <= (sizeof(dec->rcb_infos) / sizeof(u32))) { + int i; + + ret = device_property_read_u32_array(dev, "rockchip,rcb-info", + dec->rcb_infos, dec->rcb_info_count); + if (!ret) { + dev_info(dev, "rcb_info_count %u\n", dec->rcb_info_count); + for (i = 0; i < dec->rcb_info_count; i += 2) + dev_info(dev, "[%u, %u]\n", + dec->rcb_infos[i], dec->rcb_infos[i+1]); + } + } + return 0; err_sram_map: @@ -1496,6 +1524,7 @@ static int rkvdec2_core_probe(struct platform_device *pdev) struct rkvdec2_dev *dec; struct mpp_dev *mpp; struct device *dev = &pdev->dev; + irq_handler_t irq_proc = NULL; dec = devm_kzalloc(dev, sizeof(*dec), GFP_KERNEL); if (!dec) @@ -1519,29 +1548,44 @@ static int rkvdec2_core_probe(struct platform_device *pdev) dev_err(dev, "probe sub driver failed\n"); return ret; } + dec->mmu_base = ioremap(dec->mpp.io_base + 0x600, 0x80); + if (!dec->mmu_base) + dev_err(dev, "mmu base map failed!\n"); + /* attach core to ccu */ ret = rkvdec2_attach_ccu(dev, dec); if (ret) { dev_err(dev, "attach ccu failed\n"); return ret; } - /* power domain autosuspend delay 2s */ - pm_runtime_set_autosuspend_delay(dev, 2000); - pm_runtime_use_autosuspend(dev); /* alloc rcb buffer */ rkvdec2_alloc_rcbbuf(pdev, dec); /* set device for link */ - rkvdec2_ccu_link_init(pdev, dec); + ret = rkvdec2_ccu_link_init(pdev, dec); + if (ret) + return ret; mpp->dev_ops->alloc_task = rkvdec2_ccu_alloc_task; - mpp->dev_ops->task_worker = rkvdec2_soft_ccu_worker; - kthread_init_work(&mpp->work, rkvdec2_soft_ccu_worker); + if (dec->ccu->ccu_mode == RKVDEC2_CCU_TASK_SOFT) { + mpp->dev_ops->task_worker = rkvdec2_soft_ccu_worker; + irq_proc = rkvdec2_soft_ccu_irq; + } else if (dec->ccu->ccu_mode == RKVDEC2_CCU_TASK_HARD) { + if (mpp->core_id == 0 && mpp->task_capacity > 1) { + dec->link_dec->task_capacity = mpp->task_capacity; + ret = rkvdec2_ccu_alloc_table(dec, dec->link_dec); + if (ret) + return ret; + } + mpp->dev_ops->task_worker = rkvdec2_hard_ccu_worker; + irq_proc = rkvdec2_hard_ccu_irq; + } + mpp->iommu_info->hdl = rkvdec2_ccu_iommu_fault_handle; + kthread_init_work(&mpp->work, mpp->dev_ops->task_worker); - mpp->fault_handler = rkvdec2_ccu_iommu_fault_handle; /* get irq request */ - ret = devm_request_threaded_irq(dev, mpp->irq, rkvdec2_soft_ccu_irq, NULL, + ret = devm_request_threaded_irq(dev, mpp->irq, irq_proc, NULL, IRQF_SHARED, dev_name(dev), mpp); if (ret) { dev_err(dev, "register interrupter runtime failed\n"); @@ -1667,7 +1711,10 @@ static int rkvdec2_remove(struct platform_device *pdev) struct rkvdec2_dev *dec = to_rkvdec2_dev(mpp); dev_info(dev, "remove device\n"); - + if (dec->mmu_base) { + iounmap(dec->mmu_base); + dec->mmu_base = NULL; + } rkvdec2_free_rcbbuf(pdev, dec); mpp_dev_remove(mpp); rkvdec2_procfs_remove(mpp); diff --git a/drivers/video/rockchip/mpp/mpp_rkvdec2.h b/drivers/video/rockchip/mpp/mpp_rkvdec2.h index be89535e6217..cc6bf19f355f 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvdec2.h +++ b/drivers/video/rockchip/mpp/mpp_rkvdec2.h @@ -209,6 +209,8 @@ struct rkvdec2_dev { dma_addr_t rcb_iova; struct page *rcb_page; u32 rcb_min_width; + u32 rcb_info_count; + u32 rcb_infos[RKVDEC_MAX_RCB_NUM * 2]; /* for link mode */ struct rkvdec_link_dev *link_dec; @@ -218,6 +220,14 @@ struct rkvdec2_dev { struct rkvdec2_ccu *ccu; u32 core_mask; u32 task_index; + /* mmu info */ + void __iomem *mmu_base; + u32 fault_iova; + u32 mmu_fault; + u32 mmu0_st; + u32 mmu1_st; + u32 mmu0_pta; + u32 mmu1_pta; }; int mpp_set_rcbbuf(struct mpp_dev *mpp, struct mpp_session *session, diff --git a/drivers/video/rockchip/mpp/mpp_rkvdec2_link.c b/drivers/video/rockchip/mpp/mpp_rkvdec2_link.c index 41a4d9089aeb..77ce1401fde4 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvdec2_link.c +++ b/drivers/video/rockchip/mpp/mpp_rkvdec2_link.c @@ -1567,7 +1567,6 @@ int rkvdec2_attach_ccu(struct device *dev, struct rkvdec2_dev *dec) struct device_node *np; struct platform_device *pdev; struct rkvdec2_ccu *ccu; - struct mpp_taskqueue *queue; mpp_debug_enter(); @@ -1590,10 +1589,11 @@ int rkvdec2_attach_ccu(struct device *dev, struct rkvdec2_dev *dec) dev_info(dev, "core_mask=%08x\n", dec->core_mask); /* if not the main-core, then attach the main core domain to current */ - queue = dec->mpp.queue; - if (&dec->mpp != queue->cores[0]) { + if (dec->mpp.core_id != 0) { + struct mpp_taskqueue *queue; struct mpp_iommu_info *ccu_info, *cur_info; + queue = dec->mpp.queue; /* set the ccu-domain for current device */ ccu_info = queue->cores[0]->iommu_info; cur_info = dec->mpp.iommu_info; @@ -1609,10 +1609,9 @@ int rkvdec2_attach_ccu(struct device *dev, struct rkvdec2_dev *dec) return 0; } -static void rkvdec2_ccu_link_timeout_work(struct work_struct *work_s) +static void rkvdec2_ccu_timeout_work(struct work_struct *work_s) { struct mpp_dev *mpp; - struct mpp_session *session; struct mpp_task *task = container_of(to_delayed_work(work_s), struct mpp_task, timeout_work); @@ -1626,15 +1625,8 @@ static void rkvdec2_ccu_link_timeout_work(struct work_struct *work_s) mpp_err("task %d session is null.\n", task->task_id); return; } - session = task->session; - - if (!session->mpp) { - mpp_err("task %d:%d mpp is null.\n", session->index, - task->task_id); - return; - } - mpp = task->mpp ? task->mpp : session->mpp; - mpp_err("task timeout\n"); + mpp = mpp_get_task_used_device(task, task->session); + mpp_err("%s, task timeout\n", dev_name(mpp->dev)); set_bit(TASK_STATE_TIMEOUT, &task->state); atomic_inc(&mpp->reset_request); atomic_inc(&mpp->queue->reset_request); @@ -1874,19 +1866,62 @@ void *rkvdec2_ccu_alloc_task(struct mpp_session *session, return &task->mpp_task; } +static void rkvdec2_ccu_check_pagefault_info(struct mpp_dev *mpp) +{ + u32 i = 0; + + for (i = 0; i < mpp->queue->core_count; i++) { + struct mpp_dev *core = mpp->queue->cores[i]; + struct rkvdec2_dev *dec = to_rkvdec2_dev(core); + void __iomem *mmu_base = dec->mmu_base; + u32 mmu0_st; + u32 mmu1_st; + u32 mmu0_pta; + u32 mmu1_pta; + + if (!mmu_base) + return; + + #define FAULT_STATUS 0x7e2 + rkvdec2_ccu_power_on(mpp->queue, dec->ccu); + + mmu0_st = readl(mmu_base + 0x4); + mmu1_st = readl(mmu_base + 0x44); + mmu0_pta = readl(mmu_base + 0xc); + mmu1_pta = readl(mmu_base + 0x4c); + + dec->mmu0_st = mmu0_st; + dec->mmu1_st = mmu1_st; + dec->mmu0_pta = mmu0_pta; + dec->mmu1_pta = mmu1_pta; + + pr_err("core %d mmu0 %08x %08x mm1 %08x %08x\n", + core->core_id, mmu0_st, mmu0_pta, mmu1_st, mmu1_pta); + if ((mmu0_st & FAULT_STATUS) || (mmu1_st & FAULT_STATUS) || + mmu0_pta || mmu1_pta) { + dec->fault_iova = readl(dec->link_dec->reg_base + 0x4); + dec->mmu_fault = 1; + pr_err("core %d fault iova %08x\n", core->core_id, dec->fault_iova); + rockchip_iommu_mask_irq(core->dev); + } else { + dec->mmu_fault = 0; + dec->fault_iova = 0; + } + } +} + int rkvdec2_ccu_iommu_fault_handle(struct iommu_domain *iommu, struct device *iommu_dev, unsigned long iova, int status, void *arg) { - u32 i = 0; struct mpp_dev *mpp = (struct mpp_dev *)arg; mpp_debug_enter(); - atomic_inc(&mpp->queue->reset_request); - for (i = 0; i < mpp->queue->core_count; i++) - rockchip_iommu_mask_irq(mpp->queue->cores[i]->dev); + rkvdec2_ccu_check_pagefault_info(mpp); + mpp->queue->iommu_fault = 1; + atomic_inc(&mpp->queue->reset_request); kthread_queue_work(&mpp->queue->worker, &mpp->work); mpp_debug_leave(); @@ -2041,6 +2076,36 @@ static bool rkvdec2_core_working(struct mpp_taskqueue *queue) return flag; } +static int rkvdec2_ccu_link_session_detach(struct mpp_dev *mpp, + struct mpp_taskqueue *queue) +{ + mutex_lock(&queue->session_lock); + while (atomic_read(&queue->detach_count)) { + struct mpp_session *session = NULL; + + session = list_first_entry_or_null(&queue->session_detach, + struct mpp_session, + session_link); + if (session) { + list_del_init(&session->session_link); + atomic_dec(&queue->detach_count); + } + + mutex_unlock(&queue->session_lock); + + if (session) { + mpp_dbg_session("%s detach count %d\n", dev_name(mpp->dev), + atomic_read(&queue->detach_count)); + mpp_session_deinit(session); + } + + mutex_lock(&queue->session_lock); + } + mutex_unlock(&queue->session_lock); + + return 0; +} + void rkvdec2_soft_ccu_worker(struct kthread_work *work_s) { struct mpp_task *mpp_task; @@ -2108,8 +2173,9 @@ get_task: mpp_debug(DEBUG_TASK_INFO, "pid %d, start hw %s\n", mpp_task->session->pid, dev_name(mpp->dev)); - INIT_DELAYED_WORK(&mpp_task->timeout_work, rkvdec2_ccu_link_timeout_work); - + set_bit(TASK_STATE_START, &mpp_task->state); + INIT_DELAYED_WORK(&mpp_task->timeout_work, rkvdec2_ccu_timeout_work); + schedule_delayed_work(&mpp_task->timeout_work, msecs_to_jiffies(WORK_TIMEOUT_MS)); rkvdec2_ccu_power_on(queue, dec->ccu); rkvdec2_soft_ccu_enqueue(mpp, mpp_task); @@ -2118,6 +2184,631 @@ done: list_empty(&queue->pending_list)) rkvdec2_ccu_power_off(queue, dec->ccu); out: + /* session detach out of queue */ + rkvdec2_ccu_link_session_detach(mpp, queue); + + mpp_debug_leave(); +} + +int rkvdec2_ccu_alloc_table(struct rkvdec2_dev *dec, + struct rkvdec_link_dev *link_dec) +{ + int ret, i; + struct mpp_dma_buffer *table; + struct mpp_dev *mpp = &dec->mpp; + + mpp_debug_enter(); + + /* alloc table pointer array */ + table = devm_kmalloc_array(mpp->dev, mpp->task_capacity, + sizeof(*table), GFP_KERNEL | __GFP_ZERO); + if (!table) + return -ENOMEM; + + /* alloc table buffer */ + ret = rkvdec2_link_alloc_table(mpp, link_dec); + if (ret) + return ret; + + /* init table array */ + dec->ccu->table_array = table; + for (i = 0; i < mpp->task_capacity; i++) { + table[i].iova = link_dec->table->iova + i * link_dec->link_node_size; + table[i].vaddr = link_dec->table->vaddr + i * link_dec->link_node_size; + table[i].size = link_dec->link_node_size; + INIT_LIST_HEAD(&table[i].link); + list_add_tail(&table[i].link, &dec->ccu->unused_list); + } + + return 0; +} + +static void rkvdec2_dump_ccu(struct rkvdec2_ccu *ccu) +{ + u32 i; + + for (i = 0; i < 10; i++) + mpp_err("ccu:reg[%d]=%08x\n", i, readl(ccu->reg_base + 4 * i)); + + for (i = 16; i < 22; i++) + mpp_err("ccu:reg[%d]=%08x\n", i, readl(ccu->reg_base + 4 * i)); +} + +static void rkvdec2_dump_link(struct rkvdec2_dev *dec) +{ + u32 i; + + for (i = 0; i < 10; i++) + mpp_err("link:reg[%d]=%08x\n", i, readl(dec->link_dec->reg_base + 4 * i)); +} + +static void rkvdec2_dump_core(struct mpp_dev *mpp, struct rkvdec2_task *task) +{ + u32 j; + + if (task) { + for (j = 0; j < 273; j++) + mpp_err("reg[%d]=%08x, %08x\n", j, mpp_read(mpp, j*4), task->reg[j]); + } else { + for (j = 0; j < 273; j++) + mpp_err("reg[%d]=%08x\n", j, mpp_read(mpp, j*4)); + } +} + +irqreturn_t rkvdec2_hard_ccu_irq(int irq, void *param) +{ + u32 irq_status; + struct mpp_dev *mpp = param; + struct rkvdec2_dev *dec = to_rkvdec2_dev(mpp); + + irq_status = readl(dec->link_dec->reg_base + RKVDEC_LINK_IRQ_BASE); + dec->ccu->ccu_core_work_mode = readl(dec->ccu->reg_base + RKVDEC_CCU_CORE_WORK_BASE); + if (irq_status & RKVDEC_LINK_BIT_IRQ_RAW) { + dec->link_dec->irq_status = irq_status; + mpp->irq_status = mpp_read(mpp, RKVDEC_REG_INT_EN); + mpp_debug(DEBUG_IRQ_STATUS, "core %d link_irq=%08x, core_irq=%08x\n", + mpp->core_id, irq_status, mpp->irq_status); + + writel(irq_status & 0xfffff0ff, + dec->link_dec->reg_base + RKVDEC_LINK_IRQ_BASE); + + kthread_queue_work(&mpp->queue->worker, &mpp->work); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static int rkvdec2_hard_ccu_finish(struct rkvdec_link_info *hw, struct rkvdec2_task *task) +{ + u32 i, off, s, n; + struct rkvdec_link_part *part = hw->part_r; + u32 *tb_reg = (u32 *)task->table->vaddr; + + mpp_debug_enter(); + + for (i = 0; i < hw->part_r_num; i++) { + off = part[i].tb_reg_off; + s = part[i].reg_start; + n = part[i].reg_num; + memcpy(&task->reg[s], &tb_reg[off], n * sizeof(u32)); + } + /* revert hack for irq status */ + task->reg[RKVDEC_REG_INT_EN_INDEX] = task->irq_status; + + mpp_debug_leave(); + + return 0; +} + +static int rkvdec2_hard_ccu_dequeue(struct mpp_taskqueue *queue, + struct rkvdec2_ccu *ccu, + struct rkvdec_link_info *hw) +{ + struct mpp_task *mpp_task = NULL, *n; + u32 dump_reg = 0; + u32 dequeue_none = 0; + + mpp_debug_enter(); + list_for_each_entry_safe(mpp_task, n, &queue->running_list, queue_link) { + u32 timeout_flag = test_bit(TASK_STATE_TIMEOUT, &mpp_task->state); + u32 abort_flag = test_bit(TASK_STATE_ABORT, &mpp_task->state); + struct rkvdec2_task *task = to_rkvdec2_task(mpp_task); + u32 *tb_reg = (u32 *)task->table->vaddr; + u32 irq_status = tb_reg[hw->tb_reg_int]; + u32 ccu_decoded_num, ccu_total_dec_num; + + ccu_decoded_num = readl(ccu->reg_base + RKVDEC_CCU_DEC_NUM_BASE); + ccu_total_dec_num = readl(ccu->reg_base + RKVDEC_CCU_TOTAL_NUM_BASE); + mpp_debug(DEBUG_IRQ_CHECK, + "session %d task %d w:h[%d %d] err %d irq_status %08x timeout=%u abort=%u iova %08x next %08x ccu[%d %d]\n", + mpp_task->session->index, mpp_task->task_index, task->width, + task->height, !!(irq_status & RKVDEC_INT_ERROR_MASK), irq_status, + timeout_flag, abort_flag, (u32)task->table->iova, + ((u32 *)task->table->vaddr)[hw->tb_reg_next], + ccu_decoded_num, ccu_total_dec_num); + + if (irq_status || timeout_flag || abort_flag) { + set_bit(TASK_STATE_HANDLE, &mpp_task->state); + cancel_delayed_work(&mpp_task->timeout_work); + mpp_time_diff(mpp_task); + task->irq_status = irq_status; + + if (irq_status) + rkvdec2_hard_ccu_finish(hw, task); + + set_bit(TASK_STATE_FINISH, &mpp_task->state); + set_bit(TASK_STATE_DONE, &mpp_task->state); + + if (timeout_flag && !dump_reg && mpp_debug_unlikely(DEBUG_DUMP_ERR_REG)) { + u32 i; + + mpp_err("###### ccu #####\n"); + rkvdec2_dump_ccu(ccu); + for (i = 0; i < queue->core_count; i++) { + mpp_err("###### core %d #####\n", i); + rkvdec2_dump_link(to_rkvdec2_dev(queue->cores[i])); + rkvdec2_dump_core(queue->cores[i], task); + } + dump_reg = 1; + } + list_move_tail(&task->table->link, &ccu->unused_list); + /* free task */ + list_del_init(&mpp_task->queue_link); + /* Wake up the GET thread */ + wake_up(&mpp_task->wait); + if ((irq_status & RKVDEC_INT_ERROR_MASK) || timeout_flag) { + pr_err("session %d task %d irq_status %08x timeout=%u abort=%u\n", + mpp_task->session->index, mpp_task->task_index, + irq_status, timeout_flag, abort_flag); + atomic_inc(&queue->reset_request); + } + + kref_put(&mpp_task->ref, mpp_free_task); + } else { + dequeue_none++; + /* + * there are only 2 cores, + * if dequeue not finish task more than 2, + * means the others task still not get run by hw, can break early. + */ + if (dequeue_none > 2) + break; + } + } + + mpp_debug_leave(); + return 0; +} + +static int rkvdec2_hard_ccu_reset(struct mpp_taskqueue *queue, struct rkvdec2_ccu *ccu) +{ + int i = 0; + + mpp_debug_enter(); + + /* reset and active core */ + for (i = 0; i < queue->core_count; i++) { + u32 val = 0; + struct mpp_dev *mpp = queue->cores[i]; + struct rkvdec2_dev *dec = to_rkvdec2_dev(mpp); + + if (mpp->disable) + continue; + dev_info(mpp->dev, "resetting...\n"); + disable_hardirq(mpp->irq); + /* force idle */ + writel(dec->core_mask, ccu->reg_base + RKVDEC_CCU_CORE_IDLE_BASE); + writel(0, ccu->reg_base + RKVDEC_CCU_WORK_BASE); + + { + /* soft reset */ + u32 val; + + mpp_write(mpp, RKVDEC_REG_IMPORTANT_BASE, RKVDEC_SOFTREST_EN); + udelay(5); + val = mpp_read(mpp, RKVDEC_REG_INT_EN); + if (!(val & RKVDEC_SOFT_RESET_READY)) + mpp_err("soft reset fail, int %08x\n", val); + + // /* cru reset */ + // dev_info(mpp->dev, "cru reset\n"); + // rkvdec2_reset(mpp); + } +#if IS_ENABLED(CONFIG_ROCKCHIP_SIP) + rockchip_dmcfreq_lock(); + sip_smc_vpu_reset(i, 0, 0); + rockchip_dmcfreq_unlock(); +#else + rkvdec2_reset(mpp); +#endif + mpp_iommu_refresh(mpp->iommu_info, mpp->dev); + enable_irq(mpp->irq); + atomic_set(&mpp->reset_request, 0); + val = mpp_read_relaxed(mpp, 272*4); + dev_info(mpp->dev, "reset done, idle %d\n", (val & 1)); + } + /* reset ccu */ + mpp_safe_reset(ccu->rst_a); + udelay(5); + mpp_safe_unreset(ccu->rst_a); + + mpp_debug_leave(); + return 0; +} + +#define RKVDEC2_1080P_PIXELS (1920*1080) +#define RKVDEC2_4K_PIXELS (4096*2304) +#define RKVDEC2_8K_PIXELS (7680*4320) +#define RKVDEC2_TIMEOUT_20MS (0xefffff) +#define RKVDEC2_TIMEOUT_50MS (0x2cfffff) +#define RKVDEC2_TIMEOUT_100MS (0x4ffffff) + +static u32 rkvdec2_get_timeout_threshold(struct rkvdec2_task *task) +{ + u32 pixels = task->pixels; + + if (pixels < RKVDEC2_1080P_PIXELS) + return RKVDEC2_TIMEOUT_20MS; + else if (pixels < RKVDEC2_4K_PIXELS) + return RKVDEC2_TIMEOUT_50MS; + else + return RKVDEC2_TIMEOUT_100MS; +} + +static struct mpp_task * +rkvdec2_hard_ccu_prepare(struct mpp_task *mpp_task, + struct rkvdec2_ccu *ccu, struct rkvdec_link_info *hw) +{ + u32 i, off, s, n; + u32 *tb_reg; + struct mpp_dma_buffer *table = NULL; + struct rkvdec_link_part *part; + struct rkvdec2_task *task = to_rkvdec2_task(mpp_task); + + mpp_debug_enter(); + + if (test_bit(TASK_STATE_PREPARE, &mpp_task->state)) + return mpp_task; + + /* ensure that cur table iova points to the next link table*/ + { + struct mpp_dma_buffer *table0 = NULL, *table1 = NULL, *n; + + list_for_each_entry_safe(table, n, &ccu->unused_list, link) { + if (!table0) { + table0 = table; + continue; + } + if (!table1) + table1 = table; + break; + } + if (!table0 || !table1) + return NULL; + ((u32 *)table0->vaddr)[hw->tb_reg_next] = table1->iova; + table = table0; + } + + /* set session idx */ + rkvdec2_set_core_info(task->reg, mpp_task->session->index); + tb_reg = (u32 *)table->vaddr; + part = hw->part_w; + + /* disable multicore pu/colmv offset req timeout reset */ + task->reg[RKVDEC_REG_EN_MODE_SET] |= BIT(1); + task->reg[RKVDEC_REG_TIMEOUT_THRESHOLD] = rkvdec2_get_timeout_threshold(task); + + for (i = 0; i < hw->part_w_num; i++) { + off = part[i].tb_reg_off; + s = part[i].reg_start; + n = part[i].reg_num; + memcpy(&tb_reg[off], &task->reg[s], n * sizeof(u32)); + } + + /* memset read registers */ + part = hw->part_r; + for (i = 0; i < hw->part_r_num; i++) { + off = part[i].tb_reg_off; + n = part[i].reg_num; + memset(&tb_reg[off], 0, n * sizeof(u32)); + } + list_move_tail(&table->link, &ccu->used_list); + task->table = table; + set_bit(TASK_STATE_PREPARE, &mpp_task->state); + mpp_dbg_ccu("session %d task %d iova %08x next %08x\n", + mpp_task->session->index, mpp_task->task_index, (u32)task->table->iova, + ((u32 *)task->table->vaddr)[hw->tb_reg_next]); + + mpp_debug_leave(); + + return mpp_task; +} + +static int rkvdec2_ccu_link_fix_rcb_regs(struct rkvdec2_dev *dec) +{ + int ret = 0; + u32 i, val; + u32 reg, reg_idx, rcb_size, rcb_offset; + + if (!dec->rcb_iova && !dec->rcb_info_count) + goto done; + /* check whether fixed */ + val = readl(dec->link_dec->reg_base + RKVDEC_LINK_IRQ_BASE); + if (val & RKVDEC_CCU_BIT_FIX_RCB) + goto done; + /* set registers */ + rcb_offset = 0; + for (i = 0; i < dec->rcb_info_count; i += 2) { + reg_idx = dec->rcb_infos[i]; + rcb_size = dec->rcb_infos[i + 1]; + mpp_debug(DEBUG_SRAM_INFO, + "rcb: reg %u size %u offset %u sram_size %u rcb_size %u\n", + reg_idx, rcb_size, rcb_offset, dec->sram_size, dec->rcb_size); + if ((rcb_offset + rcb_size) > dec->rcb_size) { + mpp_err("rcb: reg[%u] set failed.\n", reg_idx); + ret = -ENOMEM; + goto done; + } + reg = dec->rcb_iova + rcb_offset; + mpp_write(&dec->mpp, reg_idx * sizeof(u32), reg); + rcb_offset += rcb_size; + } + + val |= RKVDEC_CCU_BIT_FIX_RCB; + writel(val, dec->link_dec->reg_base + RKVDEC_LINK_IRQ_BASE); +done: + return ret; +} + +static int rkvdec2_hard_ccu_enqueue(struct rkvdec2_ccu *ccu, + struct mpp_task *mpp_task, + struct mpp_taskqueue *queue, + struct mpp_dev *mpp) +{ + u32 ccu_en, work_mode, link_mode; + struct rkvdec2_task *task = to_rkvdec2_task(mpp_task); + + mpp_debug_enter(); + + if (test_bit(TASK_STATE_START, &mpp_task->state)) + goto done; + + ccu_en = readl(ccu->reg_base + RKVDEC_CCU_WORK_BASE); + mpp_dbg_ccu("ccu_en=%d\n", ccu_en); + if (!ccu_en) { + u32 i; + + /* set work mode */ + work_mode = 0; + for (i = 0; i < queue->core_count; i++) { + u32 val; + struct mpp_dev *core = queue->cores[i]; + struct rkvdec2_dev *dec = to_rkvdec2_dev(core); + + if (mpp->disable) + continue; + work_mode |= dec->core_mask; + rkvdec2_ccu_link_fix_rcb_regs(dec); + /* control by ccu */ + val = readl(dec->link_dec->reg_base + RKVDEC_LINK_IRQ_BASE); + val |= RKVDEC_LINK_BIT_CCU_WORK_MODE; + writel(val, dec->link_dec->reg_base + RKVDEC_LINK_IRQ_BASE); + } + writel(work_mode, ccu->reg_base + RKVDEC_CCU_CORE_WORK_BASE); + ccu->ccu_core_work_mode = readl(ccu->reg_base + RKVDEC_CCU_CORE_WORK_BASE); + mpp_dbg_ccu("ccu_work_mode=%08x, ccu_work_status=%08x\n", + readl(ccu->reg_base + RKVDEC_CCU_CORE_WORK_BASE), + readl(ccu->reg_base + RKVDEC_CCU_CORE_STA_BASE)); + + /* set auto gating */ + writel(RKVDEC_CCU_BIT_AUTOGATE, ccu->reg_base + RKVDEC_CCU_CTRL_BASE); + /* link start base */ + writel(task->table->iova, ccu->reg_base + RKVDEC_CCU_CFG_ADDR_BASE); + /* enable link */ + writel(RKVDEC_CCU_BIT_WORK_EN, ccu->reg_base + RKVDEC_CCU_WORK_BASE); + } + + /* set link mode */ + link_mode = ccu_en ? RKVDEC_CCU_BIT_ADD_MODE : 0; + writel(link_mode | RKVDEC_LINK_ADD_CFG_NUM, ccu->reg_base + RKVDEC_CCU_LINK_MODE_BASE); + + /* flush tlb before starting hardware */ + mpp_iommu_flush_tlb(mpp->iommu_info); + /* wmb */ + wmb(); + /* configure done */ + writel(RKVDEC_CCU_BIT_CFG_DONE, ccu->reg_base + RKVDEC_CCU_CFG_DONE_BASE); + + mpp_time_record(mpp_task); + set_bit(TASK_STATE_START, &mpp_task->state); + INIT_DELAYED_WORK(&mpp_task->timeout_work, rkvdec2_ccu_timeout_work); + schedule_delayed_work(&mpp_task->timeout_work, msecs_to_jiffies(WORK_TIMEOUT_MS)); + /* pending to running */ + if (!test_bit(TASK_STATE_RUNNING, &mpp_task->state)) { + mutex_lock(&queue->pending_lock); + set_bit(TASK_STATE_RUNNING, &mpp_task->state); + list_move_tail(&mpp_task->queue_link, &queue->running_list); + mutex_unlock(&queue->pending_lock); + } + mpp_dbg_ccu("session %d task %d iova=%08x task->state=%lx link_mode=%08x\n", + mpp_task->session->index, mpp_task->task_index, + (u32)task->table->iova, mpp_task->state, + readl(ccu->reg_base + RKVDEC_CCU_LINK_MODE_BASE)); +done: + mpp_debug_leave(); + + return 0; +} + +static void rkvdec2_hard_ccu_handle_pagefault_task(struct rkvdec2_dev *dec, + struct mpp_task *mpp_task) +{ + struct rkvdec2_task *task = to_rkvdec2_task(mpp_task); + + mpp_dbg_ccu("session %d task %d w:h[%d %d] pagefault mmu0[%08x %08x] mmu1[%08x %08x] fault_iova %08x\n", + mpp_task->session->index, mpp_task->task_index, + task->width, task->height, dec->mmu0_st, dec->mmu0_pta, + dec->mmu1_st, dec->mmu1_pta, dec->fault_iova); + + set_bit(TASK_STATE_HANDLE, &mpp_task->state); + task->irq_status |= BIT(4); + cancel_delayed_work(&mpp_task->timeout_work); + rkvdec2_hard_ccu_finish(dec->link_dec->info, task); + set_bit(TASK_STATE_FINISH, &mpp_task->state); + set_bit(TASK_STATE_DONE, &mpp_task->state); + list_move_tail(&task->table->link, &dec->ccu->unused_list); + list_del_init(&mpp_task->queue_link); + /* Wake up the GET thread */ + wake_up(&mpp_task->wait); + kref_put(&mpp_task->ref, mpp_free_task); + dec->mmu_fault = 0; + dec->fault_iova = 0; +} + +static void rkvdec2_hard_ccu_pagefault_proc(struct mpp_taskqueue *queue) +{ + struct mpp_task *loop = NULL, *n; + + list_for_each_entry_safe(loop, n, &queue->running_list, queue_link) { + struct rkvdec2_task *task = to_rkvdec2_task(loop); + u32 iova = (u32)task->table->iova; + u32 i; + + for (i = 0; i < queue->core_count; i++) { + struct mpp_dev *core = queue->cores[i]; + struct rkvdec2_dev *dec = to_rkvdec2_dev(core); + + if (!dec->mmu_fault || dec->fault_iova != iova) + continue; + rkvdec2_hard_ccu_handle_pagefault_task(dec, loop); + } + } +} + +static void rkvdec2_hard_ccu_resend_tasks(struct mpp_dev *mpp, struct mpp_taskqueue *queue) +{ + struct rkvdec2_task *task_pre = NULL; + struct mpp_task *loop = NULL, *n; + struct rkvdec2_dev *dec = to_rkvdec2_dev(mpp); + + /* re sort running list */ + list_for_each_entry_safe(loop, n, &queue->running_list, queue_link) { + struct rkvdec2_task *task = to_rkvdec2_task(loop); + u32 *tb_reg = (u32 *)task->table->vaddr; + u32 irq_status = tb_reg[dec->link_dec->info->tb_reg_int]; + + if (!irq_status) { + if (task_pre) { + tb_reg = (u32 *)task_pre->table->vaddr; + tb_reg[dec->link_dec->info->tb_reg_next] = task->table->iova; + } + task_pre = task; + } + } + + if (task_pre) { + struct mpp_dma_buffer *tbl; + u32 *tb_reg; + + tbl = list_first_entry_or_null(&dec->ccu->unused_list, + struct mpp_dma_buffer, link); + WARN_ON(!tbl); + if (tbl) { + tb_reg = (u32 *)task_pre->table->vaddr; + tb_reg[dec->link_dec->info->tb_reg_next] = tbl->iova; + } + } + + /* resend */ + list_for_each_entry_safe(loop, n, &queue->running_list, queue_link) { + struct rkvdec2_task *task = to_rkvdec2_task(loop); + u32 *tb_reg = (u32 *)task->table->vaddr; + u32 irq_status = tb_reg[dec->link_dec->info->tb_reg_int]; + + mpp_dbg_ccu("reback: session %d task %d iova %08x next %08x irq_status 0x%08x\n", + loop->session->index, loop->task_index, (u32)task->table->iova, + tb_reg[dec->link_dec->info->tb_reg_next], irq_status); + + if (!irq_status) { + cancel_delayed_work(&loop->timeout_work); + clear_bit(TASK_STATE_START, &loop->state); + rkvdec2_hard_ccu_enqueue(dec->ccu, loop, queue, mpp); + } + } +} + +void rkvdec2_hard_ccu_worker(struct kthread_work *work_s) +{ + struct mpp_task *mpp_task; + struct mpp_dev *mpp = container_of(work_s, struct mpp_dev, work); + struct mpp_taskqueue *queue = mpp->queue; + struct rkvdec2_dev *dec = to_rkvdec2_dev(mpp); + + mpp_debug_enter(); + + /* process all finished task in running list */ + rkvdec2_hard_ccu_dequeue(queue, dec->ccu, dec->link_dec->info); + + /* process reset request */ + if (atomic_read(&queue->reset_request) && + (list_empty(&queue->running_list) || !dec->ccu->ccu_core_work_mode)) { + /* + * cancel running list timeout work to avoid + * sw timeout causeby reset long time + */ + struct mpp_task *loop = NULL, *n; + + list_for_each_entry_safe(loop, n, &queue->running_list, queue_link) { + cancel_delayed_work(&loop->timeout_work); + } + /* reset process */ + rkvdec2_hard_ccu_reset(queue, dec->ccu); + atomic_set(&queue->reset_request, 0); + /* if iommu pagefault, find the fault task and drop it */ + if (queue->iommu_fault) { + rkvdec2_hard_ccu_pagefault_proc(queue); + queue->iommu_fault = 0; + } + + /* relink running task iova in list, and resend them to hw */ + if (!list_empty(&queue->running_list)) + rkvdec2_hard_ccu_resend_tasks(mpp, queue); + } +get_task: + /* get one task form pending list */ + mutex_lock(&queue->pending_lock); + mpp_task = list_first_entry_or_null(&queue->pending_list, + struct mpp_task, queue_link); + mutex_unlock(&queue->pending_lock); + + if (!mpp_task) + goto done; + if (test_bit(TASK_STATE_ABORT, &mpp_task->state)) { + mutex_lock(&queue->pending_lock); + list_del_init(&mpp_task->queue_link); + mutex_unlock(&queue->pending_lock); + kref_put(&mpp_task->ref, mpp_free_task); + goto get_task; + } + + if (atomic_read(&queue->reset_request)) + mpp_task = NULL; + else + mpp_task = rkvdec2_hard_ccu_prepare(mpp_task, dec->ccu, dec->link_dec->info); + + if (!mpp_task) + goto done; + + rkvdec2_ccu_power_on(queue, dec->ccu); + rkvdec2_hard_ccu_enqueue(dec->ccu, mpp_task, queue, mpp); +done: + mutex_lock(&queue->pending_lock); + if (list_empty(&queue->running_list) && + list_empty(&queue->pending_list)) + rkvdec2_ccu_power_off(queue, dec->ccu); + mutex_unlock(&queue->pending_lock); + /* session detach out of queue */ mpp_session_cleanup_detach(queue, work_s); diff --git a/drivers/video/rockchip/mpp/mpp_rkvdec2_link.h b/drivers/video/rockchip/mpp/mpp_rkvdec2_link.h index 270cd46a71e7..053bc6ae86bb 100644 --- a/drivers/video/rockchip/mpp/mpp_rkvdec2_link.h +++ b/drivers/video/rockchip/mpp/mpp_rkvdec2_link.h @@ -18,10 +18,14 @@ #define RKVDEC_REG_SECOND_EN_INDEX 12 #define RKVDEC_WAIT_RESET_EN BIT(7) +#define RKVDEC_REG_EN_MODE_SET 13 + #define RKVDEC_REG_DEBUG_INT_BASE 0x440 #define RKVDEC_REG_DEBUG_INT_INDEX 272 #define RKVDEC_BIT_BUS_IDLE BIT(0) +#define RKVDEC_REG_TIMEOUT_THRESHOLD 32 + /* define for link hardware */ #define RKVDEC_LINK_ADD_CFG_NUM 1 @@ -181,6 +185,13 @@ struct rkvdec_link_dev { u32 stuff_cnt; }; +enum RKVDEC2_CCU_MODE { + RKVDEC2_CCU_MODE_NULL = 0, + RKVDEC2_CCU_TASK_SOFT = 1, + RKVDEC2_CCU_TASK_HARD = 2, + RKVDEC2_CCU_MODE_BUTT, +}; + struct rkvdec2_ccu { struct device *dev; /* register base */ @@ -192,6 +203,13 @@ struct rkvdec2_ccu { struct proc_dir_entry *procfs; #endif struct reset_control *rst_a; + enum RKVDEC2_CCU_MODE ccu_mode; + u32 ccu_core_work_mode; + + struct mpp_dma_buffer *table_array; + struct list_head unused_list; + struct list_head used_list; + u32 timeout_flag; }; extern struct rkvdec_link_info rkvdec_link_rk356x_hw_info; @@ -222,4 +240,9 @@ int rkvdec2_ccu_iommu_fault_handle(struct iommu_domain *iommu, irqreturn_t rkvdec2_soft_ccu_irq(int irq, void *param); void rkvdec2_soft_ccu_worker(struct kthread_work *work_s); +int rkvdec2_ccu_alloc_table(struct rkvdec2_dev *dec, + struct rkvdec_link_dev *link_dec); +irqreturn_t rkvdec2_hard_ccu_irq(int irq, void *param); +void rkvdec2_hard_ccu_worker(struct kthread_work *work_s); + #endif From d5f441d9a63d0bca58358adc1d598db379286fc7 Mon Sep 17 00:00:00 2001 From: Algea Cao Date: Fri, 17 Mar 2023 17:59:24 +0800 Subject: [PATCH 48/79] drm: bridge: dw-hdmi: Fix some TV display error when play hdr video in 4K25Hz Signed-off-by: Algea Cao Change-Id: I93092d531435c47885e013c02e6655821370256a --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 990271cae22f..7a9dee297911 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2344,7 +2344,7 @@ static void hdmi_config_drm_infoframe(struct dw_hdmi *hdmi, * avi and hdr infoframe cannot be sent at the same time * for compatibility with Huawei TV */ - mdelay(50); + msleep(300); hdmi_modb(hdmi, HDMI_FC_PACKET_TX_EN_DRM_ENABLE, HDMI_FC_PACKET_TX_EN_DRM_MASK, HDMI_FC_PACKET_TX_EN); From 1ea14f41e66aa157c86b891a038934ead3b78ca6 Mon Sep 17 00:00:00 2001 From: Sandy Huang Date: Mon, 20 Mar 2023 09:06:05 +0800 Subject: [PATCH 49/79] drm/rockchip: vop3: rk3562: remove unused defined Signed-off-by: Sandy Huang Change-Id: Ibe0b2e3ce9358c8509630aa00167264f675fa7c3 --- drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 64 -------------------- 1 file changed, 64 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index d9aae6a98560..fadb52c659c1 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -1029,70 +1029,6 @@ static const struct vop2_video_port_regs rk3562_vop_vp0_regs = { .layer_sel = VOP_REG(RK3528_OVL_PORT0_LAYER_SEL, 0xffff, 0), }; -static const struct vop2_video_port_regs rk3562_vop_vp1_regs = { - .cfg_done = VOP_REG(RK3568_REG_CFG_DONE, 0x1, 1), - .overlay_mode = VOP_REG(RK3528_OVL_PORT1_CTRL, 0x1, 0), - .dsp_background = VOP_REG(RK3568_VP1_DSP_BG, 0xffffffff, 0), - .out_mode = VOP_REG(RK3568_VP1_DSP_CTRL, 0xf, 0), - .core_dclk_div = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 4), - .p2i_en = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 5), - .dsp_filed_pol = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 6), - .dsp_interlace = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 7), - .dsp_data_swap = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1f, 8), - .dsp_x_mir_en = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 13), - .post_dsp_out_r2y = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 15), - .pre_dither_down_en = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 16), - .dither_down_en = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 17), - .dither_down_sel = VOP_REG(RK3568_VP1_DSP_CTRL, 0x3, 18), - .dither_down_mode = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 20), - .gamma_update_en = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 22), - .dsp_lut_en = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 28), - .standby = VOP_REG(RK3568_VP1_DSP_CTRL, 0x1, 31), - .bg_mix_ctrl = VOP_REG(RK3528_OVL_PORT1_BG_MIX_CTRL, 0xffff, 0), - .bg_dly = VOP_REG(RK3528_OVL_PORT1_BG_MIX_CTRL, 0xff, 24), - .pre_scan_htiming = VOP_REG(RK3568_VP1_PRE_SCAN_HTIMING, 0x1fff1fff, 0), - .hpost_st_end = VOP_REG(RK3568_VP1_POST_DSP_HACT_INFO, 0x1fff1fff, 0), - .vpost_st_end = VOP_REG(RK3568_VP1_POST_DSP_VACT_INFO, 0x1fff1fff, 0), - .post_scl_factor = VOP_REG(RK3568_VP1_POST_SCL_FACTOR_YRGB, 0xffffffff, 0), - .post_scl_ctrl = VOP_REG(RK3568_VP1_POST_SCL_CTRL, 0x3, 0), - .htotal_pw = VOP_REG(RK3568_VP1_DSP_HTOTAL_HS_END, 0xffffffff, 0), - .hact_st_end = VOP_REG(RK3568_VP1_DSP_HACT_ST_END, 0xffffffff, 0), - .dsp_vtotal = VOP_REG(RK3568_VP1_DSP_VTOTAL_VS_END, 0x1fff, 16), - .sw_dsp_vtotal_imd = VOP_REG(RK3568_VP1_DSP_VTOTAL_VS_END, 0x1, 15), - .dsp_vs_end = VOP_REG(RK3568_VP1_DSP_VTOTAL_VS_END, 0x1fff, 0), - .vact_st_end = VOP_REG(RK3568_VP1_DSP_VACT_ST_END, 0x1fff1fff, 0), - .vact_st_end_f1 = VOP_REG(RK3568_VP1_DSP_VACT_ST_END_F1, 0x1fff1fff, 0), - .vs_st_end_f1 = VOP_REG(RK3568_VP1_DSP_VS_ST_END_F1, 0x1fff1fff, 0), - .vpost_st_end_f1 = VOP_REG(RK3568_VP1_POST_DSP_VACT_INFO_F1, 0x1fff1fff, 0), - .bcsh_brightness = VOP_REG(RK3568_VP1_BCSH_BCS, 0xff, 0), - .bcsh_contrast = VOP_REG(RK3568_VP1_BCSH_BCS, 0x1ff, 8), - .bcsh_sat_con = VOP_REG(RK3568_VP1_BCSH_BCS, 0x3ff, 20), - .bcsh_out_mode = VOP_REG(RK3568_VP1_BCSH_BCS, 0x3, 30), - .bcsh_sin_hue = VOP_REG(RK3568_VP1_BCSH_H, 0x1ff, 0), - .bcsh_cos_hue = VOP_REG(RK3568_VP1_BCSH_H, 0x1ff, 16), - .bcsh_r2y_csc_mode = VOP_REG(RK3568_VP1_BCSH_CTRL, 0x3, 6), - .bcsh_r2y_en = VOP_REG(RK3568_VP1_BCSH_CTRL, 0x1, 4), - .bcsh_y2r_csc_mode = VOP_REG(RK3568_VP1_BCSH_CTRL, 0x3, 2), - .bcsh_y2r_en = VOP_REG(RK3568_VP1_BCSH_CTRL, 0x1, 0), - .bcsh_en = VOP_REG(RK3568_VP1_BCSH_COLOR_BAR, 0x1, 31), - .edpi_te_en = VOP_REG(RK3568_VP1_DUAL_CHANNEL_CTRL, 0x1, 28), - .edpi_wms_hold_en = VOP_REG(RK3568_VP1_DUAL_CHANNEL_CTRL, 0x1, 30), - .edpi_wms_fs = VOP_REG(RK3568_VP1_DUAL_CHANNEL_CTRL, 0x1, 31), - .lut_dma_rid = VOP_REG(RK3568_SYS_AXI_LUT_CTRL, 0xf, 4), - .mcu_pix_total = VOP_REG(RK3562_VP1_MCU_CTRL, 0x3f, 0), - .mcu_cs_pst = VOP_REG(RK3562_VP1_MCU_CTRL, 0xf, 6), - .mcu_cs_pend = VOP_REG(RK3562_VP1_MCU_CTRL, 0x3f, 10), - .mcu_rw_pst = VOP_REG(RK3562_VP1_MCU_CTRL, 0xf, 16), - .mcu_rw_pend = VOP_REG(RK3562_VP1_MCU_CTRL, 0x3f, 20), - .mcu_hold_mode = VOP_REG(RK3562_VP1_MCU_CTRL, 0x1, 27), - .mcu_frame_st = VOP_REG(RK3562_VP1_MCU_CTRL, 0x1, 28), - .mcu_rs = VOP_REG(RK3562_VP1_MCU_CTRL, 0x1, 29), - .mcu_bypass = VOP_REG(RK3562_VP1_MCU_CTRL, 0x1, 30), - .mcu_type = VOP_REG(RK3562_VP1_MCU_CTRL, 0x1, 31), - .mcu_rw_bypass_port = VOP_REG(RK3562_VP1_MCU_RW_BYPASS_PORT, 0xffffffff, 0), - .layer_sel = VOP_REG(RK3528_OVL_PORT1_LAYER_SEL, 0xffff, 0), -}; - static const struct vop2_video_port_data rk3562_vop_video_ports[] = { { .id = 0, From 152a4c49b85314c267225c42e47d3f7b14c293c4 Mon Sep 17 00:00:00 2001 From: Chen Shunqing Date: Fri, 17 Mar 2023 11:49:40 +0000 Subject: [PATCH 50/79] media: rockchip: hdmirx: fix error when cec message length is greater than 16 Change-Id: Iba31625a3178cd6beeca45058dd3cb6c99a896ca Signed-off-by: Chen Shunqing --- drivers/media/platform/rockchip/hdmirx/rk_hdmirx_cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/hdmirx/rk_hdmirx_cec.c b/drivers/media/platform/rockchip/hdmirx/rk_hdmirx_cec.c index 93410232d9a0..70f1ff66d9f7 100644 --- a/drivers/media/platform/rockchip/hdmirx/rk_hdmirx_cec.c +++ b/drivers/media/platform/rockchip/hdmirx/rk_hdmirx_cec.c @@ -68,7 +68,7 @@ static int hdmirx_cec_transmit(struct cec_adapter *adap, u8 attempts, for (i = 0; i < msg_len; i++) data[i / 4] |= msg->msg[i] << (i % 4) * 8; - data_len = msg_len / 4 + 1; + data_len = (msg_len + 3) / 4; for (i = 0; i < data_len; i++) hdmirx_cec_write(cec, CEC_TX_DATA3_0 + i * 4, data[i]); From 41542aacfe19961977e2c6179bba94c7d84ffe49 Mon Sep 17 00:00:00 2001 From: Rimon Xu Date: Fri, 2 Dec 2022 10:13:04 +0800 Subject: [PATCH 51/79] arm64: dts: rockchip: rk3528: Add video tunnel device Signed-off-by: Rimon Xu Change-Id: I2b6e5b09f5fcd4fcbb09fabf3a970515f270532d --- arch/arm64/boot/dts/rockchip/rk3528.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3528.dtsi b/arch/arm64/boot/dts/rockchip/rk3528.dtsi index a89f1f139df0..e2296038589a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3528.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3528.dtsi @@ -344,6 +344,11 @@ method = "smc"; }; + rkvtunnel: rkvtunnel { + compatible = "rockchip,video-tunnel"; + status = "disabled"; + }; + rockchip_suspend: rockchip-suspend { compatible = "rockchip,pm-rk3528"; status = "disabled"; From d011989675a5db2b51f350c99b8916a836158adf Mon Sep 17 00:00:00 2001 From: Rimon Xu Date: Fri, 9 Dec 2022 16:05:20 +0800 Subject: [PATCH 52/79] arm64: dts: rockchip: rk3528-evb: enable rkvtunnel node Signed-off-by: Rimon Xu Change-Id: I6c2390645e6a9ea24941a2fab5b5f4910e7d4343 --- arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi b/arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi index 55d88dc6fc27..532b4de62e8a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3528-evb.dtsi @@ -493,6 +493,10 @@ status = "okay"; }; +&rkvtunnel { + status = "okay"; +}; + &rockchip_suspend { status = "okay"; rockchip,sleep-debug-en = <1>; From e54650456acaa423021fb9af2c320b0e97690dd8 Mon Sep 17 00:00:00 2001 From: Wyon Bi Date: Mon, 20 Mar 2023 08:43:31 +0000 Subject: [PATCH 53/79] mfd: max96745: Reduce i2c operations Fixes: 83465554242e ("mfd: max96745: Support i2c-mux-idle-disconnect property") Signed-off-by: Wyon Bi Change-Id: Ib6cc9b197561bf1326442cebbd9844af3853d06a --- drivers/mfd/max96745.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/mfd/max96745.c b/drivers/mfd/max96745.c index d973f8991f4e..53cf69a5521b 100644 --- a/drivers/mfd/max96745.c +++ b/drivers/mfd/max96745.c @@ -18,6 +18,7 @@ struct max96745 { struct device *dev; struct regmap *regmap; struct i2c_mux_core *muxc; + bool idle_disc; struct gpio_desc *enable_gpio; struct gpio_desc *lock_gpio; }; @@ -63,6 +64,9 @@ static int max96745_select(struct i2c_mux_core *muxc, u32 chan) { struct max96745 *max96745 = dev_get_drvdata(muxc->dev); + if (!max96745->idle_disc) + return 0; + if (chan == 1) regmap_update_bits(max96745->regmap, 0x0086, DIS_REM_CC, FIELD_PREP(DIS_REM_CC, 0)); @@ -77,6 +81,9 @@ static int max96745_deselect(struct i2c_mux_core *muxc, u32 chan) { struct max96745 *max96745 = dev_get_drvdata(muxc->dev); + if (!max96745->idle_disc) + return 0; + if (chan == 1) regmap_update_bits(max96745->regmap, 0x0086, DIS_REM_CC, FIELD_PREP(DIS_REM_CC, 1)); @@ -109,10 +116,12 @@ static void max96745_power_on(struct max96745 *max96745) msleep(200); } - regmap_update_bits(max96745->regmap, 0x0076, DIS_REM_CC, - FIELD_PREP(DIS_REM_CC, 1)); - regmap_update_bits(max96745->regmap, 0x0086, DIS_REM_CC, - FIELD_PREP(DIS_REM_CC, 1)); + if (max96745->idle_disc) { + regmap_update_bits(max96745->regmap, 0x0076, DIS_REM_CC, + FIELD_PREP(DIS_REM_CC, 1)); + regmap_update_bits(max96745->regmap, 0x0086, DIS_REM_CC, + FIELD_PREP(DIS_REM_CC, 1)); + } } static ssize_t line_fault_monitor_show(struct device *device, @@ -198,7 +207,6 @@ static int max96745_i2c_probe(struct i2c_client *client) struct device_node *child; struct max96745 *max96745; unsigned int nr = 0; - bool idle_disc; int ret; for_each_available_child_of_node(dev->of_node, child) { @@ -212,11 +220,11 @@ static int max96745_i2c_probe(struct i2c_client *client) if (!max96745) return -ENOMEM; - idle_disc = device_property_read_bool(dev, "i2c-mux-idle-disconnect"); + max96745->idle_disc = device_property_read_bool(dev, "i2c-mux-idle-disconnect"); max96745->muxc = i2c_mux_alloc(client->adapter, dev, nr, 0, I2C_MUX_LOCKED, max96745_select, - idle_disc ? max96745_deselect : NULL); + max96745_deselect); if (!max96745->muxc) return -ENOMEM; From 86250dffd64828bdf3f2de49b79e7f62d85373d6 Mon Sep 17 00:00:00 2001 From: Wyon Bi Date: Mon, 20 Mar 2023 08:48:42 +0000 Subject: [PATCH 54/79] mfd: max96745: Set I2C speed to Fast-mode Signed-off-by: Wyon Bi Change-Id: Ide9286e46ec3b1829a6c0346c7780f355d7e2281 --- drivers/mfd/max96745.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mfd/max96745.c b/drivers/mfd/max96745.c index 53cf69a5521b..bd93d2db71a7 100644 --- a/drivers/mfd/max96745.c +++ b/drivers/mfd/max96745.c @@ -116,6 +116,9 @@ static void max96745_power_on(struct max96745 *max96745) msleep(200); } + /* Set for I2C Fast-mode speed */ + regmap_write(max96745->regmap, 0x0070, 0x16); + if (max96745->idle_disc) { regmap_update_bits(max96745->regmap, 0x0076, DIS_REM_CC, FIELD_PREP(DIS_REM_CC, 1)); From e54a2f1ed15fcf62b5d7d7d77af9c4b034b70fbb Mon Sep 17 00:00:00 2001 From: David Wu Date: Thu, 3 Nov 2022 10:47:48 +0800 Subject: [PATCH 55/79] net: ethernet: stmmac: Add uio support for stmmac Currently only supports single channel, and the network card name needs to be eth0 and eth1. Signed-off-by: David Wu Change-Id: I19975b10e2ed12931edc2e8bd50c003416a1109c --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 7 + drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + drivers/net/ethernet/stmicro/stmmac/common.h | 8 + .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 1 + .../net/ethernet/stmicro/stmmac/stmmac_uio.c | 1050 +++++++++++++++++ 5 files changed, 1067 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_uio.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 38039c9c1e60..d6cbbac56b06 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -25,6 +25,13 @@ config STMMAC_SELFTESTS feature if you are facing problems with your HW and submit the test results to the netdev Mailing List. +config STMMAC_UIO + tristate "STMMAC_UIO ethernet controller" + default n + select UIO + help + Say M here if you want to use the stmmac_uio.ko for DPDK. + config STMMAC_ETHTOOL bool "Ethtool feature for STMMAC" default STMMAC_ETH if !ROCKCHIP_MINI_KERNEL diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 05c792eb473f..d017cbca223e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rockchip.o dwmac-rockchip-objs := dwmac-rk.o dwmac-rockchip-$(CONFIG_DWMAC_ROCKCHIP_TOOL) += dwmac-rk-tool.o +obj-$(CONFIG_STMMAC_UIO) += stmmac_uio.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o obj-$(CONFIG_DWMAC_STM32) += dwmac-stm32.o diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index df7de50497a0..37658d7bc752 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -48,10 +48,18 @@ */ #define DMA_MIN_TX_SIZE 64 #define DMA_MAX_TX_SIZE 1024 +#if IS_ENABLED(CONFIG_STMMAC_UIO) +#define DMA_DEFAULT_TX_SIZE 1024 +#else #define DMA_DEFAULT_TX_SIZE 512 +#endif #define DMA_MIN_RX_SIZE 64 #define DMA_MAX_RX_SIZE 1024 +#if IS_ENABLED(CONFIG_STMMAC_UIO) +#define DMA_DEFAULT_RX_SIZE 1024 +#else #define DMA_DEFAULT_RX_SIZE 512 +#endif #define STMMAC_GET_ENTRY(x, size) ((x + 1) & (size - 1)) #undef FRAME_FILTER_DEBUG diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 7c1a14b256da..8a2992189972 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -396,6 +396,7 @@ int stmmac_mdio_reset(struct mii_bus *bus) #endif return 0; } +EXPORT_SYMBOL(stmmac_mdio_reset); /** * stmmac_mdio_register diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_uio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_uio.c new file mode 100644 index 000000000000..11ec4b787da4 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_uio.c @@ -0,0 +1,1050 @@ +// SPDX-License-Identifier: GPL-2.0 +/** + * Copyright 2023 ROCKCHIP + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_DEBUG_FS +#include +#include +#endif /* CONFIG_DEBUG_FS */ +#include +#include +#include +#include "stmmac_ptp.h" +#include "stmmac.h" +#include +#include +#include "dwmac1000.h" +#include "dwxgmac2.h" +#include "hwif.h" +#include "mmc.h" + +#define DRIVER_NAME "rockchip_gmac_uio_drv" +#define DRIVER_VERSION "0.1" + +#define TC_DEFAULT 64 +static int tc = TC_DEFAULT; + +#define DEFAULT_BUFSIZE 1536 +static int buf_sz = DEFAULT_BUFSIZE; + +#define STMMAC_RX_COPYBREAK 256 + +/** + * rockchip_gmac_uio_pdev_info + * local information for uio module driver + * + * @dev: device pointer + * @ndev: network device pointer + * @name: uio name + * @uio: uio information + * @map_num: number of uio memory regions + */ +struct rockchip_gmac_uio_pdev_info { + struct device *dev; + struct net_device *ndev; + char name[16]; + struct uio_info uio; + int map_num; +}; + +static int rockchip_gmac_uio_open(struct uio_info *info, struct inode *inode) +{ + return 0; +} + +static int rockchip_gmac_uio_release(struct uio_info *info, + struct inode *inode) +{ + return 0; +} + +static int rockchip_gmac_uio_mmap(struct uio_info *info, + struct vm_area_struct *vma) +{ + u32 ret; + u32 pfn; + + pfn = (info->mem[vma->vm_pgoff].addr) >> PAGE_SHIFT; + + if (vma->vm_pgoff) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_device(vma->vm_page_prot); + + ret = remap_pfn_range(vma, vma->vm_start, pfn, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + if (ret) { + /* Error Handle */ + pr_err("remap_pfn_range failed"); + } + return ret; +} + +/** + * uio_free_dma_rx_desc_resources - free RX dma desc resources + * @priv: private structure + */ +static void uio_free_dma_rx_desc_resources(struct stmmac_priv *priv) +{ + u32 rx_count = priv->plat->rx_queues_to_use; + u32 queue; + + /* Free RX queue resources */ + for (queue = 0; queue < rx_count; queue++) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + + /* Free DMA regions of consistent memory previously allocated */ + if (!priv->extend_desc) + dma_free_coherent(priv->device, priv->dma_rx_size * + sizeof(struct dma_desc), + rx_q->dma_rx, rx_q->dma_rx_phy); + else + dma_free_coherent(priv->device, priv->dma_rx_size * + sizeof(struct dma_extended_desc), + rx_q->dma_erx, rx_q->dma_rx_phy); + } +} + +/** + * uio_free_dma_tx_desc_resources - free TX dma desc resources + * @priv: private structure + */ +static void uio_free_dma_tx_desc_resources(struct stmmac_priv *priv) +{ + u32 tx_count = priv->plat->tx_queues_to_use; + u32 queue; + + /* Free TX queue resources */ + for (queue = 0; queue < tx_count; queue++) { + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + size_t size; + void *addr; + + if (priv->extend_desc) { + size = sizeof(struct dma_extended_desc); + addr = tx_q->dma_etx; + } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { + size = sizeof(struct dma_edesc); + addr = tx_q->dma_entx; + } else { + size = sizeof(struct dma_desc); + addr = tx_q->dma_tx; + } + + size *= priv->dma_tx_size; + + dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy); + } +} + +/** + * uio_alloc_dma_rx_desc_resources - alloc RX resources. + * @priv: private structure + * Description: according to which descriptor can be used (extend or basic) + * this function allocates the resources for TX and RX paths. In case of + * reception, for example, it pre-allocated the RX socket buffer in order to + * allow zero-copy mechanism. + */ +static int uio_alloc_dma_rx_desc_resources(struct stmmac_priv *priv) +{ + u32 rx_count = priv->plat->rx_queues_to_use; + int ret = -ENOMEM; + u32 queue; + + /* RX queues buffers and DMA */ + for (queue = 0; queue < rx_count; queue++) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + + if (priv->extend_desc) { + rx_q->dma_erx = dma_alloc_coherent(priv->device, + priv->dma_rx_size * + sizeof(struct dma_extended_desc), + &rx_q->dma_rx_phy, + GFP_KERNEL); + if (!rx_q->dma_erx) + goto err_dma; + } else { + rx_q->dma_rx = dma_alloc_coherent(priv->device, + priv->dma_rx_size * + sizeof(struct dma_desc), + &rx_q->dma_rx_phy, + GFP_KERNEL); + if (!rx_q->dma_rx) + goto err_dma; + } + } + + return 0; + +err_dma: + uio_free_dma_rx_desc_resources(priv); + + return ret; +} + +/** + * uio_alloc_dma_tx_desc_resources - alloc TX resources. + * @priv: private structure + * Description: according to which descriptor can be used (extend or basic) + * this function allocates the resources for TX and RX paths. In case of + * reception, for example, it pre-allocated the RX socket buffer in order to + * allow zero-copy mechanism. + */ +static int uio_alloc_dma_tx_desc_resources(struct stmmac_priv *priv) +{ + u32 tx_count = priv->plat->tx_queues_to_use; + int ret = -ENOMEM; + u32 queue; + + /* TX queues buffers and DMA */ + for (queue = 0; queue < tx_count; queue++) { + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + size_t size; + void *addr; + + tx_q->queue_index = queue; + tx_q->priv_data = priv; + + if (priv->extend_desc) + size = sizeof(struct dma_extended_desc); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + size = sizeof(struct dma_edesc); + else + size = sizeof(struct dma_desc); + + size *= priv->dma_tx_size; + + addr = dma_alloc_coherent(priv->device, size, + &tx_q->dma_tx_phy, GFP_KERNEL); + if (!addr) + goto err_dma; + + if (priv->extend_desc) + tx_q->dma_etx = addr; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + tx_q->dma_entx = addr; + else + tx_q->dma_tx = addr; + } + + return 0; + +err_dma: + uio_free_dma_tx_desc_resources(priv); + return ret; +} + +/** + * uio_alloc_dma_desc_resources - alloc TX/RX resources. + * @priv: private structure + * Description: according to which descriptor can be used (extend or basic) + * this function allocates the resources for TX and RX paths. In case of + * reception, for example, it pre-allocated the RX socket buffer in order to + * allow zero-copy mechanism. + */ +static int uio_alloc_dma_desc_resources(struct stmmac_priv *priv) +{ + /* RX Allocation */ + int ret = uio_alloc_dma_rx_desc_resources(priv); + + if (ret) + return ret; + + ret = uio_alloc_dma_tx_desc_resources(priv); + + return ret; +} + +/** + * uio_free_dma_desc_resources - free dma desc resources + * @priv: private structure + */ +static void uio_free_dma_desc_resources(struct stmmac_priv *priv) +{ + /* Release the DMA RX socket buffers */ + uio_free_dma_rx_desc_resources(priv); + + /* Release the DMA TX socket buffers */ + uio_free_dma_tx_desc_resources(priv); +} + +/** + * rockchip_gmac_uio_init_phy - PHY initialization + * @dev: net device structure + * Description: it initializes the driver's PHY state, and attaches the PHY + * to the mac driver. + * Return value: + * 0 on success + */ +static int rockchip_gmac_uio_init_phy(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct device_node *node; + int ret; + + node = priv->plat->phylink_node; + + if (node) + ret = phylink_of_phy_connect(priv->phylink, node, 0); + + /* Some DT bindings do not set-up the PHY handle. Let's try to + * manually parse it + */ + if (!node || ret) { + int addr = priv->plat->phy_addr; + struct phy_device *phydev; + + phydev = mdiobus_get_phy(priv->mii, addr); + if (!phydev) { + netdev_err(priv->dev, "no phy at addr %d\n", addr); + return -ENODEV; + } + + ret = phylink_connect_phy(priv->phylink, phydev); + } + + if (!priv->plat->pmt) { + struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + + phylink_ethtool_get_wol(priv->phylink, &wol); + device_set_wakeup_capable(priv->device, !!wol.supported); + } + + return ret; +} + +/** + * rockchip_gmac_uio_init_dma_engine - DMA init. + * @priv: driver private structure + * Description: + * It inits the DMA invoking the specific MAC/GMAC callback. + * Some DMA parameters can be passed from the platform; + * in case of these are not passed a default is kept for the MAC or GMAC. + */ +static int rockchip_gmac_uio_init_dma_engine(struct stmmac_priv *priv) +{ + u32 rx_channels_count = priv->plat->rx_queues_to_use; + u32 tx_channels_count = priv->plat->tx_queues_to_use; + u32 dma_csr_ch = max(rx_channels_count, tx_channels_count); + struct stmmac_rx_queue *rx_q; + struct stmmac_tx_queue *tx_q; + u32 chan = 0; + int atds = 0; + int ret = 0; + + if (!priv->plat->dma_cfg || !priv->plat->dma_cfg->pbl) { + dev_err(priv->device, "Invalid DMA configuration\n"); + return -EINVAL; + } + + if (priv->extend_desc && priv->mode == STMMAC_RING_MODE) + atds = 1; + + ret = stmmac_reset(priv, priv->ioaddr); + if (ret) { + dev_err(priv->device, "Failed to reset the dma\n"); + return ret; + } + + /* DMA Configuration */ + stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, atds); + + if (priv->plat->axi) + stmmac_axi(priv, priv->ioaddr, priv->plat->axi); + + /* DMA CSR Channel configuration */ + for (chan = 0; chan < dma_csr_ch; chan++) + stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan); + + /* DMA RX Channel Configuration */ + for (chan = 0; chan < rx_channels_count; chan++) { + rx_q = &priv->rx_queue[chan]; + + stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + rx_q->dma_rx_phy, chan); + + rx_q->rx_tail_addr = rx_q->dma_rx_phy + + (priv->dma_rx_size * + sizeof(struct dma_desc)); + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, + rx_q->rx_tail_addr, chan); + } + + /* DMA TX Channel Configuration */ + for (chan = 0; chan < tx_channels_count; chan++) { + tx_q = &priv->tx_queue[chan]; + + stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + tx_q->dma_tx_phy, chan); + + tx_q->tx_tail_addr = tx_q->dma_tx_phy; + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, + tx_q->tx_tail_addr, chan); + } + + return ret; +} + +static void uio_set_rings_length(struct stmmac_priv *priv) +{ + u32 rx_channels_count = priv->plat->rx_queues_to_use; + u32 tx_channels_count = priv->plat->tx_queues_to_use; + u32 chan; + + /* set TX ring length */ + for (chan = 0; chan < tx_channels_count; chan++) + stmmac_set_tx_ring_len(priv, priv->ioaddr, + (priv->dma_tx_size - 1), chan); + + /* set RX ring length */ + for (chan = 0; chan < rx_channels_count; chan++) + stmmac_set_rx_ring_len(priv, priv->ioaddr, + (priv->dma_rx_size - 1), chan); +} + +/** + * uio_set_tx_queue_weight - Set TX queue weight + * @priv: driver private structure + * Description: It is used for setting TX queues weight + */ +static void uio_set_tx_queue_weight(struct stmmac_priv *priv) +{ + u32 tx_queues_count = priv->plat->tx_queues_to_use; + u32 weight; + u32 queue; + + for (queue = 0; queue < tx_queues_count; queue++) { + weight = priv->plat->tx_queues_cfg[queue].weight; + stmmac_set_mtl_tx_queue_weight(priv, priv->hw, weight, queue); + } +} + +/** + * uio_configure_cbs - Configure CBS in TX queue + * @priv: driver private structure + * Description: It is used for configuring CBS in AVB TX queues + */ +static void uio_configure_cbs(struct stmmac_priv *priv) +{ + u32 tx_queues_count = priv->plat->tx_queues_to_use; + u32 mode_to_use; + u32 queue; + + /* queue 0 is reserved for legacy traffic */ + for (queue = 1; queue < tx_queues_count; queue++) { + mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; + if (mode_to_use == MTL_QUEUE_DCB) + continue; + + stmmac_config_cbs(priv, priv->hw, + priv->plat->tx_queues_cfg[queue].send_slope, + priv->plat->tx_queues_cfg[queue].idle_slope, + priv->plat->tx_queues_cfg[queue].high_credit, + priv->plat->tx_queues_cfg[queue].low_credit, + queue); + } +} + +/** + * uio_rx_queue_dma_chan_map - Map RX queue to RX dma channel + * @priv: driver private structure + * Description: It is used for mapping RX queues to RX dma channels + */ +static void uio_rx_queue_dma_chan_map(struct stmmac_priv *priv) +{ + u32 rx_queues_count = priv->plat->rx_queues_to_use; + u32 queue; + u32 chan; + + for (queue = 0; queue < rx_queues_count; queue++) { + chan = priv->plat->rx_queues_cfg[queue].chan; + stmmac_map_mtl_to_dma(priv, priv->hw, queue, chan); + } +} + +/** + * uio_mac_config_rx_queues_prio - Configure RX Queue priority + * @priv: driver private structure + * Description: It is used for configuring the RX Queue Priority + */ +static void uio_mac_config_rx_queues_prio(struct stmmac_priv *priv) +{ + u32 rx_queues_count = priv->plat->rx_queues_to_use; + u32 queue; + u32 prio; + + for (queue = 0; queue < rx_queues_count; queue++) { + if (!priv->plat->rx_queues_cfg[queue].use_prio) + continue; + + prio = priv->plat->rx_queues_cfg[queue].prio; + stmmac_rx_queue_prio(priv, priv->hw, prio, queue); + } +} + +/** + * uio_mac_config_tx_queues_prio - Configure TX Queue priority + * @priv: driver private structure + * Description: It is used for configuring the TX Queue Priority + */ +static void uio_mac_config_tx_queues_prio(struct stmmac_priv *priv) +{ + u32 tx_queues_count = priv->plat->tx_queues_to_use; + u32 queue; + u32 prio; + + for (queue = 0; queue < tx_queues_count; queue++) { + if (!priv->plat->tx_queues_cfg[queue].use_prio) + continue; + + prio = priv->plat->tx_queues_cfg[queue].prio; + stmmac_tx_queue_prio(priv, priv->hw, prio, queue); + } +} + +/** + * uio_mac_config_rx_queues_routing - Configure RX Queue Routing + * @priv: driver private structure + * Description: It is used for configuring the RX queue routing + */ +static void uio_mac_config_rx_queues_routing(struct stmmac_priv *priv) +{ + u32 rx_queues_count = priv->plat->rx_queues_to_use; + u32 queue; + u8 packet; + + for (queue = 0; queue < rx_queues_count; queue++) { + /* no specific packet type routing specified for the queue */ + if (priv->plat->rx_queues_cfg[queue].pkt_route == 0x0) + continue; + + packet = priv->plat->rx_queues_cfg[queue].pkt_route; + stmmac_rx_queue_routing(priv, priv->hw, packet, queue); + } +} + +static void uio_mac_config_rss(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.rssen || !priv->plat->rss_en) { + priv->rss.enable = false; + return; + } + + if (priv->dev->features & NETIF_F_RXHASH) + priv->rss.enable = true; + else + priv->rss.enable = false; + + stmmac_rss_configure(priv, priv->hw, &priv->rss, + priv->plat->rx_queues_to_use); +} + +/** + * uio_mac_enable_rx_queues - Enable MAC rx queues + * @priv: driver private structure + * Description: It is used for enabling the rx queues in the MAC + */ +static void uio_mac_enable_rx_queues(struct stmmac_priv *priv) +{ + u32 rx_queues_count = priv->plat->rx_queues_to_use; + int queue; + u8 mode; + + for (queue = 0; queue < rx_queues_count; queue++) { + mode = priv->plat->rx_queues_cfg[queue].mode_to_use; + stmmac_rx_queue_enable(priv, priv->hw, mode, queue); + } +} + +/** + * rockchip_gmac_uio_mtl_configuration - Configure MTL + * @priv: driver private structure + * Description: It is used for configuring MTL + */ +static void rockchip_gmac_uio_mtl_configuration(struct stmmac_priv *priv) +{ + u32 rx_queues_count = priv->plat->rx_queues_to_use; + u32 tx_queues_count = priv->plat->tx_queues_to_use; + + if (tx_queues_count > 1) + uio_set_tx_queue_weight(priv); + + /* Configure MTL RX algorithms */ + if (rx_queues_count > 1) + stmmac_prog_mtl_rx_algorithms(priv, priv->hw, + priv->plat->rx_sched_algorithm); + + /* Configure MTL TX algorithms */ + if (tx_queues_count > 1) + stmmac_prog_mtl_tx_algorithms(priv, priv->hw, + priv->plat->tx_sched_algorithm); + + /* Configure CBS in AVB TX queues */ + if (tx_queues_count > 1) + uio_configure_cbs(priv); + + /* Map RX MTL to DMA channels */ + uio_rx_queue_dma_chan_map(priv); + + /* Enable MAC RX Queues */ + uio_mac_enable_rx_queues(priv); + + /* Set RX priorities */ + if (rx_queues_count > 1) + uio_mac_config_rx_queues_prio(priv); + + /* Set TX priorities */ + if (tx_queues_count > 1) + uio_mac_config_tx_queues_prio(priv); + + /* Set RX routing */ + if (rx_queues_count > 1) + uio_mac_config_rx_queues_routing(priv); + + /* Receive Side Scaling */ + if (rx_queues_count > 1) + uio_mac_config_rss(priv); +} + +static void uio_safety_feat_configuration(struct stmmac_priv *priv) +{ + if (priv->dma_cap.asp) { + netdev_info(priv->dev, "Enabling Safety Features\n"); + stmmac_safety_feat_config(priv, priv->ioaddr, priv->dma_cap.asp); + } else { + netdev_info(priv->dev, "No Safety Features support found\n"); + } +} + +/** + * uio_dma_operation_mode - HW DMA operation mode + * @priv: driver private structure + * Description: it is used for configuring the DMA operation mode register in + * order to program the tx/rx DMA thresholds or Store-And-Forward mode. + */ +static void uio_dma_operation_mode(struct stmmac_priv *priv) +{ + u32 rx_channels_count = priv->plat->rx_queues_to_use; + u32 tx_channels_count = priv->plat->tx_queues_to_use; + int rxfifosz = priv->plat->rx_fifo_size; + int txfifosz = priv->plat->tx_fifo_size; + u32 txmode = 0; + u32 rxmode = 0; + u32 chan = 0; + u8 qmode = 0; + + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + /* Adjust for real per queue fifo size */ + rxfifosz /= rx_channels_count; + txfifosz /= tx_channels_count; + + if (priv->plat->force_thresh_dma_mode) { + txmode = tc; + rxmode = tc; + } else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) { + /* In case of GMAC, SF mode can be enabled + * to perform the TX COE in HW. This depends on: + * 1) TX COE if actually supported + * 2) There is no bugged Jumbo frame support + * that needs to not insert csum in the TDES. + */ + txmode = SF_DMA_MODE; + rxmode = SF_DMA_MODE; + priv->xstats.threshold = SF_DMA_MODE; + } else { + txmode = tc; + rxmode = SF_DMA_MODE; + } + + /* configure all channels */ + for (chan = 0; chan < rx_channels_count; chan++) { + qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; + + stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, + rxfifosz, qmode); + stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz, + chan); + } + + for (chan = 0; chan < tx_channels_count; chan++) { + qmode = priv->plat->tx_queues_cfg[chan].mode_to_use; + + stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, + txfifosz, qmode); + } +} + +/** + * rockchip_gmac_uio_hw_setup - setup mac in a usable state. + * @dev : pointer to the device structure. + * @init_ptp: initialize PTP if set + * Description: + * this is the main function to setup the HW in a usable state because the + * dma engine is reset, the core registers are configured (e.g. AXI, + * Checksum features, timers). The DMA is ready to start receiving and + * transmitting. + * Return value: + * 0 on success and an appropriate (-)ve integer as defined in errno.h + * file on failure. + */ +static int rockchip_gmac_uio_hw_setup(struct net_device *dev, bool init_ptp) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int ret; + + /* DMA initialization and SW reset */ + ret = rockchip_gmac_uio_init_dma_engine(priv); + if (ret < 0) { + netdev_err(priv->dev, "%s: DMA engine initialization failed\n", + __func__); + return ret; + } + + /* Copy the MAC addr into the HW */ + stmmac_set_umac_addr(priv, priv->hw, dev->dev_addr, 0); + + /* PS and related bits will be programmed according to the speed */ + if (priv->hw->pcs) { + int speed = priv->plat->mac_port_sel_speed; + + if (speed == SPEED_10 || speed == SPEED_100 || + speed == SPEED_1000) { + priv->hw->ps = speed; + } else { + dev_warn(priv->device, "invalid port speed\n"); + priv->hw->ps = 0; + } + } + + /* Initialize the MAC Core */ + stmmac_core_init(priv, priv->hw, dev); + + /* Initialize MTL*/ + rockchip_gmac_uio_mtl_configuration(priv); + + /* Initialize Safety Features */ + uio_safety_feat_configuration(priv); + + ret = stmmac_rx_ipc(priv, priv->hw); + if (!ret) { + netdev_warn(priv->dev, "RX IPC Checksum Offload disabled\n"); + priv->plat->rx_coe = STMMAC_RX_COE_NONE; + priv->hw->rx_csum = 0; + } + + /* Enable the MAC Rx/Tx */ + stmmac_mac_set(priv, priv->ioaddr, true); + + /* Set the HW DMA mode and the COE */ + uio_dma_operation_mode(priv); + + if (priv->hw->pcs) + stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0); + + /* set TX and RX rings length */ + uio_set_rings_length(priv); + + return 0; +} + +static int uio_set_bfsize(int mtu, int bufsize) +{ + int ret = bufsize; + + if (mtu >= BUF_SIZE_8KiB) + ret = BUF_SIZE_16KiB; + else if (mtu >= BUF_SIZE_4KiB) + ret = BUF_SIZE_8KiB; + else if (mtu >= BUF_SIZE_2KiB) + ret = BUF_SIZE_4KiB; + else if (mtu > DEFAULT_BUFSIZE) + ret = BUF_SIZE_2KiB; + else + ret = DEFAULT_BUFSIZE; + + return ret; +} + +/** + * uio_open - open entry point of the driver + * @dev : pointer to the device structure. + * Description: + * This function is the open entry point of the driver. + * Return value: + * 0 on success and an appropriate (-)ve integer as defined in errno.h + * file on failure. + */ +static int uio_open(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int bfsize = 0; + int ret; + + if (priv->hw->pcs != STMMAC_PCS_TBI && + priv->hw->pcs != STMMAC_PCS_RTBI && + !priv->hw->xpcs) { + ret = rockchip_gmac_uio_init_phy(dev); + if (ret) { + netdev_err(priv->dev, + "%s: Cannot attach to PHY (error: %d)\n", + __func__, ret); + return ret; + } + } + + /* Extra statistics */ + priv->xstats.threshold = tc; + + bfsize = stmmac_set_16kib_bfsize(priv, dev->mtu); + if (bfsize < 0) + bfsize = 0; + + if (bfsize < BUF_SIZE_16KiB) + bfsize = uio_set_bfsize(dev->mtu, priv->dma_buf_sz); + + priv->dma_buf_sz = bfsize; + buf_sz = bfsize; + + priv->rx_copybreak = STMMAC_RX_COPYBREAK; + + if (!priv->dma_tx_size) + priv->dma_tx_size = DMA_DEFAULT_TX_SIZE; + if (!priv->dma_rx_size) + priv->dma_rx_size = DMA_DEFAULT_RX_SIZE; + + ret = uio_alloc_dma_desc_resources(priv); + if (ret < 0) { + netdev_err(priv->dev, "%s: DMA descriptors allocation failed\n", + __func__); + goto dma_desc_error; + } + + ret = rockchip_gmac_uio_hw_setup(dev, true); + if (ret < 0) { + netdev_err(priv->dev, "%s: Hw setup failed\n", __func__); + goto init_error; + } + + phylink_start(priv->phylink); + /* We may have called phylink_speed_down before */ + phylink_speed_up(priv->phylink); + + return 0; + +init_error: + uio_free_dma_desc_resources(priv); +dma_desc_error: + phylink_disconnect_phy(priv->phylink); + return ret; +} + +/** + * uio_release - close entry point of the driver + * @dev : device pointer. + * Description: + * This is the stop entry point of the driver. + */ +static int uio_release(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + /* Stop and disconnect the PHY */ + if (dev->phydev) { + phy_stop(dev->phydev); + phy_disconnect(dev->phydev); + if (priv->plat->integrated_phy_power) + priv->plat->integrated_phy_power(priv->plat->bsp_priv, + false); + } + + /* Release and free the Rx/Tx resources */ + uio_free_dma_desc_resources(priv); + + /* Disable the MAC Rx/Tx */ + stmmac_mac_set(priv, priv->ioaddr, false); + + netif_carrier_off(dev); + + return 0; +} + +/** + * rockchip_gmac_uio_probe() platform driver probe routine + * - register uio devices filled with memory maps retrieved + * from device tree + */ +static int rockchip_gmac_uio_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node, *mac_node; + struct rockchip_gmac_uio_pdev_info *pdev_info; + struct net_device *netdev; + struct stmmac_priv *priv; + struct uio_info *uio; + struct resource *res; + int err = 0; + + pdev_info = devm_kzalloc(dev, sizeof(struct rockchip_gmac_uio_pdev_info), + GFP_KERNEL); + if (!pdev_info) + return -ENOMEM; + + uio = &pdev_info->uio; + pdev_info->dev = dev; + mac_node = of_parse_phandle(np, "rockchip,ethernet", 0); + if (!mac_node) + return -ENODEV; + + if (of_device_is_available(mac_node)) { + netdev = of_find_net_device_by_node(mac_node); + of_node_put(mac_node); + if (!netdev) + return -ENODEV; + } else { + of_node_put(mac_node); + return -EINVAL; + } + + pdev_info->ndev = netdev; + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + + rtnl_lock(); + err = uio_open(netdev); + if (err) { + rtnl_unlock(); + dev_err(dev, "Failed to open stmmac resource: %d\n", err); + return err; + } + rtnl_unlock(); + + priv = netdev_priv(netdev); + snprintf(pdev_info->name, sizeof(pdev_info->name), "uio_%s", + netdev->name); + uio->name = pdev_info->name; + uio->version = DRIVER_VERSION; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + uio->mem[0].name = "eth_regs"; + uio->mem[0].addr = res->start & PAGE_MASK; + uio->mem[0].size = PAGE_ALIGN(resource_size(res)); + uio->mem[0].memtype = UIO_MEM_PHYS; + + uio->mem[1].name = "eth_rx_bd"; + uio->mem[1].addr = priv->rx_queue[0].dma_rx_phy; + uio->mem[1].size = priv->dma_rx_size * sizeof(struct dma_desc); + uio->mem[1].memtype = UIO_MEM_PHYS; + + uio->mem[2].name = "eth_tx_bd"; + uio->mem[2].addr = priv->tx_queue[0].dma_tx_phy; + uio->mem[2].size = priv->dma_tx_size * sizeof(struct dma_desc); + uio->mem[2].memtype = UIO_MEM_PHYS; + + uio->open = rockchip_gmac_uio_open; + uio->release = rockchip_gmac_uio_release; + /* Custom mmap function. */ + uio->mmap = rockchip_gmac_uio_mmap; + uio->priv = pdev_info; + + err = uio_register_device(dev, uio); + if (err) { + dev_err(dev, "Failed to register uio device: %d\n", err); + return err; + } + + pdev_info->map_num = 3; + + dev_info(dev, "Registered %s uio devices, %d register maps attached\n", + pdev_info->name, pdev_info->map_num); + + platform_set_drvdata(pdev, pdev_info); + + return 0; +} + +/** + * rockchip_gmac_uio_remove() - ROCKCHIP ETH UIO platform driver release + * routine - unregister uio devices + */ +static int rockchip_gmac_uio_remove(struct platform_device *pdev) +{ + struct rockchip_gmac_uio_pdev_info *pdev_info = + platform_get_drvdata(pdev); + struct net_device *netdev; + + if (!pdev_info) + return -EINVAL; + + netdev = pdev_info->ndev; + + uio_unregister_device(&pdev_info->uio); + + if (netdev) { + rtnl_lock(); + uio_release(netdev); + rtnl_unlock(); + } + + platform_set_drvdata(pdev, NULL); + + if (netdev) { + rtnl_lock(); + dev_open(netdev, NULL); + rtnl_unlock(); + } + + return 0; +} + +static const struct of_device_id rockchip_gmac_uio_of_match[] = { + { .compatible = "rockchip,uio-gmac", }, + { } +}; + +static struct platform_driver rockchip_gmac_uio_driver = { + .driver = { + .owner = THIS_MODULE, + .name = DRIVER_NAME, + .of_match_table = rockchip_gmac_uio_of_match, + }, + .probe = rockchip_gmac_uio_probe, + .remove = rockchip_gmac_uio_remove, +}; + +module_platform_driver(rockchip_gmac_uio_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("ROCKCHIP"); +MODULE_DESCRIPTION("ROCKCHIP GMAC UIO Driver"); From b3fc1c6b9bce7cb2b6c79b7b366487f092424eff Mon Sep 17 00:00:00 2001 From: David Wu Date: Sun, 5 Mar 2023 20:43:16 +0800 Subject: [PATCH 56/79] arm64: dts: rockchip: rk3568: Add stmmac uio nodes Signed-off-by: David Wu Change-Id: I6a7d4c1000b9ed604042b5dc5d32c9cddb4ad433 --- arch/arm64/boot/dts/rockchip/rk3566.dtsi | 1 + arch/arm64/boot/dts/rockchip/rk3568.dtsi | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3566.dtsi b/arch/arm64/boot/dts/rockchip/rk3566.dtsi index 066f13843d11..eeb394589a98 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566.dtsi @@ -54,6 +54,7 @@ /delete-node/ &gmac0_clkin; /delete-node/ &gmac0_xpcsclk; /delete-node/ &gmac0; +/delete-node/ &gmac_uio0; /delete-node/ &pcie30_phy_grf; /delete-node/ &pcie30phy; /delete-node/ &pcie3x1; diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index 3e48ce788e8e..3eaa19904f16 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -1765,6 +1765,13 @@ status = "disabled"; }; + gmac_uio1: uio@fe010000 { + compatible = "rockchip,uio-gmac"; + reg = <0x0 0xfe010000 0x0 0x10000>; + rockchip,ethernet = <&gmac1>; + status = "disabled"; + }; + gmac1: ethernet@fe010000 { compatible = "rockchip,rk3568-gmac", "snps,dwmac-4.20a"; reg = <0x0 0xfe010000 0x0 0x10000>; @@ -2510,6 +2517,13 @@ }; }; + gmac_uio0: uio@fe2a0000 { + compatible = "rockchip,uio-gmac"; + reg = <0x0 0xfe2a0000 0x0 0x10000>; + rockchip,ethernet = <&gmac0>; + status = "disabled"; + }; + gmac0: ethernet@fe2a0000 { compatible = "rockchip,rk3568-gmac", "snps,dwmac-4.20a"; reg = <0x0 0xfe2a0000 0x0 0x10000>; From d2fa6b515150dcb51ba5ea89dfee064aa6da7c4a Mon Sep 17 00:00:00 2001 From: Yandong Lin Date: Fri, 17 Mar 2023 11:12:19 +0800 Subject: [PATCH 57/79] video: rockchip: mpp: fix bus err for px30 dec/enc When encoding and decoding in parallel, iommu switch failed cause bus err. rk_iommu ff442800.iommu: BUS_ERROR occurred at 0x0000000000000000 Rootcause: Get a invalid dte addr when px30_workaround_combo_init, because of iommu not enable status. solution: read a valid dte addr when iommu enable status. Signed-off-by: Yandong Lin Change-Id: I4b84c650a6e132928a6dc91a26985abd8c273cfe --- drivers/video/rockchip/mpp/hack/mpp_hack_px30.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/video/rockchip/mpp/hack/mpp_hack_px30.c b/drivers/video/rockchip/mpp/hack/mpp_hack_px30.c index 379049eaf8fc..07912bcc3436 100644 --- a/drivers/video/rockchip/mpp/hack/mpp_hack_px30.c +++ b/drivers/video/rockchip/mpp/hack/mpp_hack_px30.c @@ -19,6 +19,7 @@ #include "../mpp_common.h" #include "../mpp_iommu.h" #include "mpp_hack_px30.h" +#include #define RK_MMU_DTE_ADDR 0x00 /* Directory table address */ #define RK_MMU_STATUS 0x04 @@ -177,7 +178,17 @@ int px30_workaround_combo_init(struct mpp_dev *mpp) iommu->grf_val = mpp->grf_info->val & MPP_GRF_VAL_MASK; if (mpp->hw_ops->clk_on) mpp->hw_ops->clk_on(mpp); - iommu->dte_addr = mpp_iommu_get_dte_addr(iommu); + /* + * ensure that iommu is enable, so that read valid dte value + */ + if (rockchip_iommu_is_enabled(mpp->dev)) + iommu->dte_addr = mpp_iommu_get_dte_addr(iommu); + else { + rockchip_iommu_enable(mpp->dev); + iommu->dte_addr = mpp_iommu_get_dte_addr(iommu); + rockchip_iommu_disable(mpp->dev); + } + dev_err(mpp->dev, "%s dte_addr %08x\n", __func__, iommu->dte_addr); if (mpp->hw_ops->clk_off) mpp->hw_ops->clk_off(mpp); INIT_LIST_HEAD(&iommu->link); From 29686dd3c9e1d81433bdc4eecddb116dbb04e2e9 Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Mon, 20 Mar 2023 20:25:02 +0800 Subject: [PATCH 58/79] arm64: dts: rockchip: Fix Makefile sorting for rk3588-nvr-demo-v10-ipc-4x-linux Fixes: b9ef4803ff95 ("arm64: dts: rockchip: add rk3588-nvr-demo-v10-ipc-4x-linux.dts") Signed-off-by: Tao Huang Change-Id: Ice0e586660b696cb3757998d3b5513956d9b3812 --- arch/arm64/boot/dts/rockchip/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/Makefile b/arch/arm64/boot/dts/rockchip/Makefile index b616484db829..7a01bdbde6cc 100644 --- a/arch/arm64/boot/dts/rockchip/Makefile +++ b/arch/arm64/boot/dts/rockchip/Makefile @@ -134,7 +134,6 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-evb7-ddr4-v10.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-iotest-ddr3-v10.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-iotest-ddr3-v10-linux.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-nvr-demo-v10.dtb -dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nvr-demo-v10-ipc-4x-linux.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-nvr-demo-v10-linux.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-nvr-demo-v10-linux-spi-nand.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-nvr-demo-v12-linux.dtb @@ -165,6 +164,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-h0-v10.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-h0-v10-linux.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nvr-demo-v10.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nvr-demo-v10-android.dtb +dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nvr-demo-v10-ipc-4x-linux.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nvr-demo-v10-spi-nand.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nvr-demo1-v21.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nvr-demo1-v21-android.dtb From 051e4d7cf0dfe747dd7983a904a7f210fc970d62 Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Tue, 17 Jan 2023 16:46:16 +0800 Subject: [PATCH 59/79] PCI: rockchip: dw: Add the specification of ep information Definition rockchip ep device information, like drivers version and devices dma status. These information will be store in BAR0 in default. Change-Id: I33dcc00e9923d10ad63d706ce16ba646f0049480 Signed-off-by: Jon Lin --- include/uapi/linux/rk-pcie-ep.h | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/include/uapi/linux/rk-pcie-ep.h b/include/uapi/linux/rk-pcie-ep.h index 37726a694b6f..c9e8fed4bb09 100644 --- a/include/uapi/linux/rk-pcie-ep.h +++ b/include/uapi/linux/rk-pcie-ep.h @@ -8,19 +8,66 @@ #include +/* + * rockchip pcie driver elbi ioctrl output data + */ struct pcie_ep_user_data { + __u64 bar0_phys_addr; __u32 elbi_app_user[11]; }; +/* + * rockchip driver cache ioctrl input param + */ struct pcie_ep_dma_cache_cfg { __u64 addr; __u32 size; }; +#define PCIE_EP_OBJ_INFO_MAGIC 0x524B4550 + +enum pcie_ep_obj_irq_type { + OBJ_IRQ_UNKNOWN, + OBJ_IRQ_DMA, + OBJ_IRQ_USER, + OBJ_IRQ_ELBI, +}; + +struct pcie_ep_obj_irq_dma_status { + __u32 wr; + __u32 rd; +}; + +enum pcie_ep_mmap_resource { + PCIE_EP_MMAP_RESOURCE_DBI, + PCIE_EP_MMAP_RESOURCE_BAR0, + PCIE_EP_MMAP_RESOURCE_BAR2, + PCIE_EP_MMAP_RESOURCE_BAR4, + PCIE_EP_MMAP_RESOURCE_MAX, +}; + +/* + * rockchip ep device information which is store in BAR0 + */ +struct pcie_ep_obj_info { + __u32 magic; + __u32 version; + __u8 reserved[0x1F8]; + + u32 irq_type_rc; /* Generate in ep isr, valid only for rc, clear in rc */ + struct pcie_ep_obj_irq_dma_status dma_status_rc; /* Generate in ep isr, valid only for rc, clear in rc */ + u32 irq_type_ep; /* Generate in ep isr, valid only for ep, clear in ep */ + struct pcie_ep_obj_irq_dma_status dma_status_ep; /* Generate in ep isr, valid only for ep, clear in ep */ + __u32 obj_irq_user_data; /* OBJ_IRQ_USER userspace data */ +}; + #define PCIE_BASE 'P' #define PCIE_DMA_GET_ELBI_DATA _IOR(PCIE_BASE, 0, struct pcie_ep_user_data) #define PCIE_DMA_CACHE_INVALIDE _IOW(PCIE_BASE, 1, struct pcie_ep_dma_cache_cfg) #define PCIE_DMA_CACHE_FLUSH _IOW(PCIE_BASE, 2, struct pcie_ep_dma_cache_cfg) #define PCIE_DMA_IRQ_MASK_ALL _IOW(PCIE_BASE, 3, int) +#define PCIE_DMA_RAISE_MSI_OBJ_IRQ_USER _IOW(PCIE_BASE, 4, int) +#define PCIE_EP_GET_USER_INFO _IOR(PCIE_BASE, 5, struct pcie_ep_user_data) +#define PCIE_EP_SET_MMAP_RESOURCE _IOW(PCIE_BASE, 6, enum pcie_ep_mmap_resource) #endif From 7152ea12b207a13e7b8d2a0c58a7f239393fda72 Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Tue, 17 Jan 2023 17:05:03 +0800 Subject: [PATCH 60/79] PCI: rockchip: dw: Support ep information 1.Support ep information, including sending ep dma status to rc by using msi 2.Set ep drivers version 0x00000001 3.Support PCIE_DMA_RAISE_MSI_OBJ_IRQ_USER ioctl Change-Id: I9c1530a1ce8289ce324b78a9dd5fa0cb6a5a1858 Signed-off-by: Jon Lin --- .../pci/controller/dwc/pcie-dw-ep-rockchip.c | 131 ++++++++++++++---- 1 file changed, 106 insertions(+), 25 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c index 943b23d2c35b..ce506ae61e07 100644 --- a/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c +++ b/drivers/pci/controller/dwc/pcie-dw-ep-rockchip.c @@ -82,6 +82,10 @@ #define PCIE_CLIENT_LTSSM_STATUS 0x300 #define PCIE_CLIENT_INTR_MASK 0x24 #define PCIE_LTSSM_ENABLE_ENHANCE BIT(4) +#define PCIE_CLIENT_MSI_GEN_CON 0x38 + +#define PCIe_CLIENT_MSI_OBJ_IRQ 0 /* rockchip ep object special irq */ + #define PCIE_ELBI_REG_NUM 0x2 #define PCIE_ELBI_LOCAL_BASE 0x200e00 @@ -99,6 +103,8 @@ #define PCIE_DBI_SIZE 0x400000 +#define PCIE_EP_OBJ_INFO_DRV_VERSION 0x00000001 + struct rockchip_pcie { struct dw_pcie pci; void __iomem *apb_base; @@ -120,6 +126,8 @@ struct rockchip_pcie { struct dma_trx_obj *dma_obj; struct fasync_struct *async; phys_addr_t dbi_base_physical; + struct pcie_ep_obj_info *obj_info; + enum pcie_ep_mmap_resource cur_mmap_res; }; struct rockchip_pcie_misc_dev { @@ -267,6 +275,10 @@ static int rockchip_pcie_resource_get(struct platform_device *pdev, rockchip->ib_target_size = resource_size(®); rockchip->ib_target_base = rockchip_pcie_map_kernel(reg.start, resource_size(®)); + rockchip->obj_info = (struct pcie_ep_obj_info *)rockchip->ib_target_base; + memset_io(rockchip->obj_info, 0, sizeof(struct pcie_ep_obj_info)); + rockchip->obj_info->magic = PCIE_EP_OBJ_INFO_MAGIC; + rockchip->obj_info->version = PCIE_EP_OBJ_INFO_DRV_VERSION; return 0; } @@ -570,55 +582,64 @@ static u8 rockchip_pcie_iatu_unroll_enabled(struct dw_pcie *pci) static void rockchip_pcie_local_elbi_enable(struct rockchip_pcie *rockchip) { int i; - u32 dlbi_reg; + u32 elbi_reg; struct dw_pcie *pci = &rockchip->pci; for (i = 0; i < PCIE_ELBI_REG_NUM; i++) { - dlbi_reg = PCIE_ELBI_LOCAL_BASE + PCIE_ELBI_LOCAL_ENABLE_OFF + + elbi_reg = PCIE_ELBI_LOCAL_BASE + PCIE_ELBI_LOCAL_ENABLE_OFF + i * 4; - dw_pcie_writel_dbi(pci, dlbi_reg, 0xffff0000); + dw_pcie_writel_dbi(pci, elbi_reg, 0xffff0000); } } static void rockchip_pcie_elbi_clear(struct rockchip_pcie *rockchip) { int i; - u32 dlbi_reg; + u32 elbi_reg; struct dw_pcie *pci = &rockchip->pci; u32 val; for (i = 0; i < PCIE_ELBI_REG_NUM; i++) { - dlbi_reg = PCIE_ELBI_LOCAL_BASE + i * 4; - val = dw_pcie_readl_dbi(pci, dlbi_reg); + elbi_reg = PCIE_ELBI_LOCAL_BASE + i * 4; + val = dw_pcie_readl_dbi(pci, elbi_reg); val <<= 16; - dw_pcie_writel_dbi(pci, dlbi_reg, val); + dw_pcie_writel_dbi(pci, elbi_reg, val); } } +static void rockchip_pcie_raise_msi_irq(struct rockchip_pcie *rockchip, u8 interrupt_num) +{ + rockchip_pcie_writel_apb(rockchip, BIT(interrupt_num), PCIE_CLIENT_MSI_GEN_CON); +} + static irqreturn_t rockchip_pcie_sys_irq_handler(int irq, void *arg) { struct rockchip_pcie *rockchip = arg; struct dw_pcie *pci = &rockchip->pci; - u32 dlbi_reg; + u32 elbi_reg; u32 chn; - union int_status status; + union int_status wr_status, rd_status; union int_clear clears; u32 reg, val, mask; + bool sigio = false; /* ELBI helper, only check the valid bits, and discard the rest interrupts */ - dlbi_reg = dw_pcie_readl_dbi(pci, PCIE_ELBI_LOCAL_BASE + PCIE_ELBI_APP_ELBI_INT_GEN0); - if (dlbi_reg & PCIE_ELBI_APP_ELBI_INT_GEN0_SIGIO) { - dev_dbg(rockchip->pci.dev, "SIGIO\n"); - kill_fasync(&rockchip->async, SIGIO, POLL_IN); + elbi_reg = dw_pcie_readl_dbi(pci, PCIE_ELBI_LOCAL_BASE + PCIE_ELBI_APP_ELBI_INT_GEN0); + if (elbi_reg & PCIE_ELBI_APP_ELBI_INT_GEN0_SIGIO) { + sigio = true; + rockchip->obj_info->irq_type_ep = OBJ_IRQ_ELBI; + rockchip_pcie_elbi_clear(rockchip); + goto out; } - rockchip_pcie_elbi_clear(rockchip); - /* DMA helper */ mask = dw_pcie_readl_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_WR_INT_MASK); - status.asdword = dw_pcie_readl_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_WR_INT_STATUS) & (~mask); + wr_status.asdword = dw_pcie_readl_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_WR_INT_STATUS) & (~mask); + mask = dw_pcie_readl_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_RD_INT_MASK); + rd_status.asdword = dw_pcie_readl_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_RD_INT_STATUS) & (~mask); + for (chn = 0; chn < PCIE_DMA_CHANEL_MAX_NUM; chn++) { - if (status.donesta & BIT(chn)) { + if (wr_status.donesta & BIT(chn)) { clears.doneclr = BIT(chn); dw_pcie_writel_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_WR_INT_CLEAR, clears.asdword); @@ -626,7 +647,7 @@ static irqreturn_t rockchip_pcie_sys_irq_handler(int irq, void *arg) rockchip->dma_obj->cb(rockchip->dma_obj, chn, DMA_TO_BUS); } - if (status.abortsta & BIT(chn)) { + if (wr_status.abortsta & BIT(chn)) { dev_err(pci->dev, "%s, abort\n", __func__); clears.abortclr = BIT(chn); dw_pcie_writel_dbi(pci, PCIE_DMA_OFFSET + @@ -634,10 +655,8 @@ static irqreturn_t rockchip_pcie_sys_irq_handler(int irq, void *arg) } } - mask = dw_pcie_readl_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_RD_INT_MASK); - status.asdword = dw_pcie_readl_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_RD_INT_STATUS) & (~mask); for (chn = 0; chn < PCIE_DMA_CHANEL_MAX_NUM; chn++) { - if (status.donesta & BIT(chn)) { + if (rd_status.donesta & BIT(chn)) { clears.doneclr = BIT(chn); dw_pcie_writel_dbi(pci, PCIE_DMA_OFFSET + PCIE_DMA_RD_INT_CLEAR, clears.asdword); @@ -645,7 +664,7 @@ static irqreturn_t rockchip_pcie_sys_irq_handler(int irq, void *arg) rockchip->dma_obj->cb(rockchip->dma_obj, chn, DMA_FROM_BUS); } - if (status.abortsta & BIT(chn)) { + if (rd_status.abortsta & BIT(chn)) { dev_err(pci->dev, "%s, abort\n", __func__); clears.abortclr = BIT(chn); dw_pcie_writel_dbi(pci, PCIE_DMA_OFFSET + @@ -653,6 +672,24 @@ static irqreturn_t rockchip_pcie_sys_irq_handler(int irq, void *arg) } } + if (wr_status.asdword || rd_status.asdword) { + rockchip->obj_info->irq_type_rc = OBJ_IRQ_DMA; + rockchip->obj_info->dma_status_rc.wr |= wr_status.asdword; + rockchip->obj_info->dma_status_rc.rd |= rd_status.asdword; + rockchip_pcie_raise_msi_irq(rockchip, PCIe_CLIENT_MSI_OBJ_IRQ); + + rockchip->obj_info->irq_type_ep = OBJ_IRQ_DMA; + rockchip->obj_info->dma_status_ep.wr |= wr_status.asdword; + rockchip->obj_info->dma_status_ep.rd |= rd_status.asdword; + sigio = true; + } + +out: + if (sigio) { + dev_dbg(rockchip->pci.dev, "SIGIO\n"); + kill_fasync(&rockchip->async, SIGIO, POLL_IN); + } + reg = rockchip_pcie_readl_apb(rockchip, PCIE_CLIENT_INTR_STATUS_MISC); if (reg & BIT(2)) { /* Setup command register */ @@ -896,6 +933,7 @@ static long pcie_ep_ioctl(struct file *file, unsigned int cmd, unsigned long arg struct pcie_ep_dma_cache_cfg cfg; void __user *uarg = (void __user *)arg; int i, ret; + enum pcie_ep_mmap_resource mmap_res; switch (cmd) { case PCIE_DMA_GET_ELBI_DATA: @@ -934,6 +972,33 @@ static long pcie_ep_ioctl(struct file *file, unsigned int cmd, unsigned long arg dw_pcie_writel_dbi(&rockchip->pci, PCIE_DMA_OFFSET + PCIE_DMA_RD_INT_MASK, 0xffffffff); break; + case PCIE_DMA_RAISE_MSI_OBJ_IRQ_USER: + rockchip->obj_info->irq_type_rc = OBJ_IRQ_USER; + rockchip_pcie_raise_msi_irq(rockchip, PCIe_CLIENT_MSI_OBJ_IRQ); + break; + case PCIE_EP_GET_USER_INFO: + msg.bar0_phys_addr = rockchip->ib_target_address; + + ret = copy_to_user(uarg, &msg, sizeof(msg)); + if (ret) { + dev_err(rockchip->pci.dev, "failed to get elbi data\n"); + return -EFAULT; + } + break; + case PCIE_EP_SET_MMAP_RESOURCE: + ret = copy_from_user(&mmap_res, uarg, sizeof(mmap_res)); + if (ret) { + dev_err(rockchip->pci.dev, "failed to get copy from\n"); + return -EFAULT; + } + + if (mmap_res >= PCIE_EP_MMAP_RESOURCE_MAX) { + dev_err(rockchip->pci.dev, "mmap index %d is out of number\n", mmap_res); + return -EINVAL; + } + + rockchip->cur_mmap_res = mmap_res; + break; default: break; } @@ -945,9 +1010,25 @@ static int pcie_ep_mmap(struct file *file, struct vm_area_struct *vma) struct rockchip_pcie *rockchip = (struct rockchip_pcie *)file->private_data; size_t size = vma->vm_end - vma->vm_start; int err; + unsigned long addr; - if (size > PCIE_DBI_SIZE) { - dev_warn(rockchip->pci.dev, "mmap size is out of limitation\n"); + switch (rockchip->cur_mmap_res) { + case PCIE_EP_MMAP_RESOURCE_DBI: + if (size > PCIE_DBI_SIZE) { + dev_warn(rockchip->pci.dev, "dbi mmap size is out of limitation\n"); + return -EINVAL; + } + addr = rockchip->dbi_base_physical; + break; + case PCIE_EP_MMAP_RESOURCE_BAR0: + if (size > rockchip->ib_target_size) { + dev_warn(rockchip->pci.dev, "bar0 mmap size is out of limitation\n"); + return -EINVAL; + } + addr = rockchip->ib_target_address; + break; + default: + dev_err(rockchip->pci.dev, "cur mmap_res %d is unsurreport\n", rockchip->cur_mmap_res); return -EINVAL; } @@ -956,7 +1037,7 @@ static int pcie_ep_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); err = remap_pfn_range(vma, vma->vm_start, - __phys_to_pfn(rockchip->dbi_base_physical), + __phys_to_pfn(addr), size, vma->vm_page_prot); if (err) return -EAGAIN; From a7c40cb119703e566d9d5befb8c1a7b0533dd7b7 Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Tue, 17 Jan 2023 17:46:48 +0800 Subject: [PATCH 61/79] PCI: rockchip: dw-dmatest: Suppport rc dma 1.Set rc dma as default 2.Changet to ep dma by sending command: echo 0 > ./sys/module/pcie_dw_dmatest/parameters/is_rc Change-Id: I9b16c328c08f220772e487c7c796b8898d74ae10 Signed-off-by: Jon Lin --- drivers/pci/controller/dwc/pcie-dw-dmatest.c | 34 ++++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-dw-dmatest.c b/drivers/pci/controller/dwc/pcie-dw-dmatest.c index edc9b68eb589..4b0d4d3e01c8 100644 --- a/drivers/pci/controller/dwc/pcie-dw-dmatest.c +++ b/drivers/pci/controller/dwc/pcie-dw-dmatest.c @@ -47,6 +47,10 @@ static unsigned int test_dev; module_param(test_dev, uint, 0644); MODULE_PARM_DESC(test_dev, "Choose dma_obj device,(default 0)"); +static bool is_rc = true; +module_param_named(is_rc, is_rc, bool, 0644); +MODULE_PARM_DESC(is_rc, "Test port is rc(default true)"); + #define PCIE_DW_MISC_DMATEST_DEV_MAX 5 #define PCIE_DMA_CHANEL_MAX_NUM 2 @@ -90,8 +94,8 @@ static int rk_pcie_dma_wait_for_finised(struct dma_trx_obj *obj, struct dma_tabl return ret; } -static int rk_pcie_dma_frombus(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, - u32 local_paddr, u32 bus_paddr, u32 size) +static int rk_pcie_ep_dma_frombus(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, + u32 local_paddr, u32 bus_paddr, u32 size) { struct dma_table *table; struct dma_trx_obj *obj = dmatest_dev->obj; @@ -133,8 +137,8 @@ static int rk_pcie_dma_frombus(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, return ret; } -static int rk_pcie_dma_tobus(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, - u32 bus_paddr, u32 local_paddr, u32 size) +static int rk_pcie_ep_dma_tobus(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, + u32 bus_paddr, u32 local_paddr, u32 size) { struct dma_table *table; struct dma_trx_obj *obj = dmatest_dev->obj; @@ -176,6 +180,18 @@ static int rk_pcie_dma_tobus(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, return ret; } +static int rk_pcie_rc_dma_frombus(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, + u32 local_paddr, u32 bus_paddr, u32 size) +{ + return rk_pcie_ep_dma_tobus(dmatest_dev, chn, local_paddr, bus_paddr, size); +} + +static int rk_pcie_rc_dma_tobus(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, + u32 bus_paddr, u32 local_paddr, u32 size) +{ + return rk_pcie_ep_dma_frombus(dmatest_dev, chn, bus_paddr, local_paddr, size); +} + static int rk_pcie_dma_interrupt_handler_call_back(struct dma_trx_obj *obj, u32 chn, enum dma_dir dir) { struct pcie_dw_dmatest_dev *dmatest_dev = (struct pcie_dw_dmatest_dev *)obj->priv; @@ -243,13 +259,19 @@ static int dma_test(struct pcie_dw_dmatest_dev *dmatest_dev, u32 chn, start_time = ktime_get(); for (i = 0; i < loop; i++) { if (rd_en) { - rk_pcie_dma_frombus(dmatest_dev, chn, local_paddr, bus_paddr, size); + if (is_rc) + rk_pcie_rc_dma_frombus(dmatest_dev, chn, local_paddr, bus_paddr, size); + else + rk_pcie_ep_dma_frombus(dmatest_dev, chn, local_paddr, bus_paddr, size); dma_sync_single_for_cpu(obj->dev, local_paddr, size, DMA_FROM_DEVICE); } if (wr_en) { dma_sync_single_for_device(obj->dev, local_paddr, size, DMA_TO_DEVICE); - rk_pcie_dma_tobus(dmatest_dev, chn, bus_paddr, local_paddr, size); + if (is_rc) + rk_pcie_rc_dma_tobus(dmatest_dev, chn, bus_paddr, local_paddr, size); + else + rk_pcie_ep_dma_tobus(dmatest_dev, chn, bus_paddr, local_paddr, size); } } end_time = ktime_get(); From 6811c647e8ee7e8b6e0c7d560d50a9cab54ecb7c Mon Sep 17 00:00:00 2001 From: Tao Huang Date: Tue, 21 Mar 2023 10:23:16 +0800 Subject: [PATCH 62/79] dma-buf: destructor only with CONFIG_DMABUF_CACHE Destructor is only used by dma-buf cache. So replace CONFIG_NO_GKI with CONFIG_DMABUF_CACHE. Fixes: 345084a2ede2 ("BACKPORT: ANDROID: GKI: dma-buf: Add support to set a destructor on a dma-buf") Signed-off-by: Tao Huang Change-Id: I08c1a10e8e74c37fbe9677115ca421425e4cdcd4 --- drivers/dma-buf/dma-buf.c | 4 ++-- include/linux/dma-buf.h | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 771be95730da..ad7ddaa35b3e 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -119,7 +119,7 @@ static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) static void dma_buf_release(struct dentry *dentry) { struct dma_buf *dmabuf; -#ifdef CONFIG_NO_GKI +#ifdef CONFIG_DMABUF_CACHE int dtor_ret = 0; #endif @@ -140,7 +140,7 @@ static void dma_buf_release(struct dentry *dentry) BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active); dma_buf_stats_teardown(dmabuf); -#ifdef CONFIG_NO_GKI +#ifdef CONFIG_DMABUF_CACHE if (dmabuf->dtor) dtor_ret = dmabuf->dtor(dmabuf, dmabuf->dtor_data); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 169e4ece0052..0b3c0643bc7f 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -359,7 +359,7 @@ struct dma_buf_ops { ANDROID_KABI_RESERVE(2); }; -#ifdef CONFIG_NO_GKI +#ifdef CONFIG_DMABUF_CACHE /** * dma_buf_destructor - dma-buf destructor function * @dmabuf: [in] pointer to dma-buf @@ -439,12 +439,10 @@ struct dma_buf { struct dma_buf *dmabuf; } *sysfs_entry; #endif -#ifdef CONFIG_NO_GKI +#ifdef CONFIG_DMABUF_CACHE dma_buf_destructor dtor; void *dtor_data; -#ifdef CONFIG_DMABUF_CACHE struct mutex cache_lock; -#endif #endif ANDROID_KABI_RESERVE(1); @@ -648,7 +646,7 @@ long dma_buf_set_name(struct dma_buf *dmabuf, const char *name); int dma_buf_get_flags(struct dma_buf *dmabuf, unsigned long *flags); int dma_buf_get_uuid(struct dma_buf *dmabuf, uuid_t *uuid); -#ifdef CONFIG_NO_GKI +#ifdef CONFIG_DMABUF_CACHE /** * dma_buf_set_destructor - set the dma-buf's destructor * @dmabuf: [in] pointer to dma-buf From 9cadc52d787d50610fde38755867f1e1628c3bc8 Mon Sep 17 00:00:00 2001 From: Guochun Huang Date: Tue, 21 Mar 2023 07:53:49 +0000 Subject: [PATCH 63/79] Revert "drm/rockchip: dsi2: set escape clk 10MHz default" solve no response when read back from panel in video mode display This reverts commit 28eb6929be0a0f572324d31b06aa875503bacf22. Change-Id: Ic42637d7714169fafa22771185e11c2ddcfa5c95 Signed-off-by: Guochun Huang --- drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c index c3bcd60b17ce..145805a45494 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c @@ -592,9 +592,9 @@ static void dw_mipi_dsi2_phy_clk_mode_cfg(struct dw_mipi_dsi2 *dsi2) */ val |= NON_CONTINUOUS_CLK; - /* The Escape clock ranges from 1MHz to 20MHz. */ + /* The maximum value of the escape clock frequency is 20MHz */ sys_clk = clk_get_rate(dsi2->sys_clk) / USEC_PER_SEC; - esc_clk_div = DIV_ROUND_UP(sys_clk, 10 * 2); + esc_clk_div = DIV_ROUND_UP(sys_clk, 20 * 2); val |= PHY_LPTX_CLK_DIV(esc_clk_div); regmap_write(dsi2->regmap, DSI2_PHY_CLK_CFG, val); From e81c289e86218a0521de10f004bd819bc88d680f Mon Sep 17 00:00:00 2001 From: Huang zhibao Date: Sat, 18 Mar 2023 15:52:02 +0800 Subject: [PATCH 64/79] arm64: dts: rockchip: rk3588-pcie-ep-demo: fix i2c port error Signed-off-by: Huang zhibao Change-Id: I8a3761983488d29e01c4353894d99e0d746ec280 --- .../dts/rockchip/rk3588-pcie-ep-demo.dtsi | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-pcie-ep-demo.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-pcie-ep-demo.dtsi index 3fc531fe548f..dda90610e10a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-pcie-ep-demo.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-pcie-ep-demo.dtsi @@ -102,16 +102,6 @@ pinctrl-0 = <&typec5v_pwren>; }; - vcc_1v1_nldo_s3: vcc-1v1-nldo-s3 { - compatible = "regulator-fixed"; - regulator-name = "vcc_1v1_nldo_s3"; - regulator-always-on; - regulator-boot-on; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; - vin-supply = <&vcc5v0_sys>; - }; - vcc12v_dcin: vcc12v-dcin { compatible = "regulator-fixed"; regulator-name = "vcc12v_dcin"; @@ -140,6 +130,16 @@ regulator-max-microvolt = <5000000>; vin-supply = <&vcc12v_dcin>; }; + + vcc_1v1_nldo_s3: vcc-1v1-nldo-s3 { + compatible = "regulator-fixed"; + regulator-name = "vcc_1v1_nldo_s3"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + vin-supply = <&vcc5v0_sys>; + }; }; &av1d_mmu { @@ -267,8 +267,10 @@ }; }; -&i2c2 { +&i2c1 { status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&i2c1m2_xfer>; vdd_npu_s0: vdd_npu_mem_s0: rk8602@42 { compatible = "rockchip,rk8602"; From 15670ab7025533c74667fdfb50813ad669839081 Mon Sep 17 00:00:00 2001 From: Algea Cao Date: Mon, 20 Mar 2023 16:21:26 +0800 Subject: [PATCH 65/79] drm/rockchip: dw_hdmi: Set default color base on previous color Fixed color anomalies caused by plug when playing hdr videos. Signed-off-by: Algea Cao Change-Id: I487ba996add98e5d474149bae3552fe0e8371ff2 --- drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 34 +++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index d7bc8ec86a6d..d883a73cfdce 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -762,6 +762,35 @@ static int hdmi_bus_fmt_color_depth(unsigned int bus_format) } } +static int hdmi_bus_fmt_to_color_format(unsigned int bus_format) +{ + switch (bus_format) { + case MEDIA_BUS_FMT_UYYVYY8_0_5X24: + case MEDIA_BUS_FMT_UYYVYY10_0_5X30: + case MEDIA_BUS_FMT_UYYVYY12_0_5X36: + case MEDIA_BUS_FMT_UYYVYY16_0_5X48: + return RK_IF_FORMAT_YCBCR420; + + case MEDIA_BUS_FMT_YUV8_1X24: + case MEDIA_BUS_FMT_YUV10_1X30: + case MEDIA_BUS_FMT_YUV12_1X36: + case MEDIA_BUS_FMT_YUV16_1X48: + return RK_IF_FORMAT_YCBCR444; + + case MEDIA_BUS_FMT_UYVY8_1X16: + case MEDIA_BUS_FMT_UYVY10_1X20: + case MEDIA_BUS_FMT_UYVY12_1X24: + return RK_IF_FORMAT_YCBCR422; + + case MEDIA_BUS_FMT_RGB888_1X24: + case MEDIA_BUS_FMT_RGB101010_1X30: + case MEDIA_BUS_FMT_RGB121212_1X36: + case MEDIA_BUS_FMT_RGB161616_1X48: + default: + return RK_IF_FORMAT_RGB; + } +} + static unsigned int hdmi_get_tmdsclock(struct rockchip_hdmi *hdmi, unsigned long pixelclock) { @@ -2005,8 +2034,9 @@ dw_hdmi_rockchip_select_output(struct drm_connector_state *conn_state, *enc_out_encoding = V4L2_YCBCR_ENC_709; } - if ((yuv422_out || hdmi->hdmi_output == RK_IF_FORMAT_YCBCR_HQ) && - color_depth == 10 && hdmi_bus_fmt_color_depth(hdmi->prev_bus_format) == 8) { + if ((yuv422_out || hdmi->hdmi_output == RK_IF_FORMAT_YCBCR_HQ) && color_depth == 10 && + (hdmi_bus_fmt_color_depth(hdmi->prev_bus_format) == 8 || + hdmi_bus_fmt_to_color_format(hdmi->prev_bus_format) == RK_IF_FORMAT_YCBCR422)) { /* We prefer use YCbCr422 to send hdr 10bit */ if (info->color_formats & DRM_COLOR_FORMAT_YCRCB422) *color_format = RK_IF_FORMAT_YCBCR422; From 57061ad7b6b2139e17dd012e24b70eb11779bc1e Mon Sep 17 00:00:00 2001 From: Shiqin Chen Date: Wed, 22 Mar 2023 10:45:09 +0800 Subject: [PATCH 66/79] arm64: dts: rockchip: rk3588-toybrick-x0: Enable pwm fan Signed-off-by: Shiqin Chen Change-Id: I537275a2d35fb28181a3f7512bc11ce01510112e --- .../boot/dts/rockchip/rk3588-toybrick-x0.dtsi | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dtsi index 812f7b113cf5..b404fd1398f9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dtsi @@ -25,6 +25,20 @@ }; }; + fan: pwm-fan { + compatible = "pwm-fan"; + #cooling-cells = <2>; + pwms = <&pwm9 0 50000 0>; + cooling-levels = <0 50 100 150 200 255>; + rockchip,temp-trips = < + 50000 1 + 55000 2 + 60000 3 + 65000 4 + 70000 5 + >; + }; + hdmiin_dc: hdmiin-dc { compatible = "rockchip,dummy-codec"; #sound-dai-cells = <0>; @@ -666,6 +680,11 @@ status = "okay"; }; +&pwm9 { + pinctrl-0 = <&pwm9m1_pins>; + status = "okay"; +}; + &sata0 { status = "okay"; }; From 69059cd5f5986900e24119eb4fdc7b4c7c0e28c3 Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Tue, 21 Mar 2023 15:41:17 +0800 Subject: [PATCH 67/79] drivers: rkflash: Fixes compiler warnings drivers/rkflash/nandc.c:393:31: warning: equality comparison with extraneous parentheses [-Wparentheses-equality] error, forbidden warning:nandc.c:393 } while ((fl_reg.V6.tr_rdy == 0)); ~~~~~~~~~~~~~~~~~^~~~ drivers/rkflash/nandc.c:393:31: note: remove extraneous parentheses around the comparison to silence this warning } while ((fl_reg.V6.tr_rdy == 0)); ~ ^ ~ drivers/rkflash/nandc.c:393:31: note: use '=' to turn this equality comparison into an assignment } while ((fl_reg.V6.tr_rdy == 0)); Change-Id: Ib4c422216c61191b924163fedfe1b83f1fd297df Signed-off-by: Jon Lin --- drivers/rkflash/nandc.c | 6 +- drivers/rkflash/rk_sftl_arm_v8_clang.S | 1995 +++++------------------- drivers/rkflash/rkflash_api.h | 2 +- drivers/rkflash/sfc_nand_boot.c | 5 +- 4 files changed, 355 insertions(+), 1653 deletions(-) diff --git a/drivers/rkflash/nandc.c b/drivers/rkflash/nandc.c index efeeede068b9..2ac3d69b1551 100644 --- a/drivers/rkflash/nandc.c +++ b/drivers/rkflash/nandc.c @@ -360,7 +360,7 @@ static void nandc_xfer_done(void) stat_reg.d32 = nandc_readl(NANDC_MTRANS_STAT); usleep_range(20, 30); } while (stat_reg.V6.mtrans_cnt < fl_reg.V6.page_num || - fl_reg.V6.tr_rdy == 0); + !fl_reg.V6.tr_rdy); if (master.mapped) { rknandc_dma_unmap_single( @@ -376,7 +376,7 @@ static void nandc_xfer_done(void) do { fl_reg.d32 = nandc_readl(NANDC_FLCTL); usleep_range(20, 30); - } while (fl_reg.V6.tr_rdy == 0); + } while (!fl_reg.V6.tr_rdy); if (master.mapped) { rknandc_dma_unmap_single( (unsigned long)(master.page_phy), @@ -390,7 +390,7 @@ static void nandc_xfer_done(void) } else { do { fl_reg.d32 = nandc_readl(NANDC_FLCTL); - } while ((fl_reg.V6.tr_rdy == 0)); + } while ((!fl_reg.V6.tr_rdy)); } } } diff --git a/drivers/rkflash/rk_sftl_arm_v8_clang.S b/drivers/rkflash/rk_sftl_arm_v8_clang.S index 335ad0db1f3d..864822c991c3 100644 --- a/drivers/rkflash/rk_sftl_arm_v8_clang.S +++ b/drivers/rkflash/rk_sftl_arm_v8_clang.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2018-2021 Rockchip Electronics Co. Ltd. - * date: 2022-08-15 + * date: 2023-03-21 */ .text .section .note.gnu.property,"a",@note @@ -4748,7 +4748,7 @@ FtlVpcTblFlush: // @FtlVpcTblFlush mov x9, #19539 movk x9, #18004, lsl #16 adrp x20, g_sys_save_data - movk x9, #87, lsl #32 + movk x9, #89, lsl #32 add x20, x20, :lo12:g_sys_save_data movk x9, #20480, lsl #48 adrp x10, c_ftl_nand_die_num @@ -7014,7 +7014,7 @@ FtlEctTblFlush: // @FtlEctTblFlush ldr w10, [x11, :lo12:g_totle_gc_page_count] ldr w11, [x12, :lo12:g_totle_write_page_count] ldr w12, [x13, :lo12:g_totle_read_page_count] - mov w8, #87 + mov w8, #89 movk w8, #20480, lsl #16 adrp x14, g_totle_l2p_write_count adrp x15, g_totle_slc_erase_count @@ -8161,12 +8161,12 @@ FtlProgPages: // @FtlProgPages mov x20, x0 str x8, [sp, #8] bl FlashProgPages - adrp x26, c_ftl_nand_planes_num - cbz w21, .LBB47_15 + cbz w21, .LBB47_22 // %bb.1: mov w28, w21 adrp x21, .L.str.77 mov x27, xzr + mov w24, wzr add x21, x21, :lo12:.L.str.77 adrp x22, c_ftl_nand_blks_per_die adrp x23, c_ftl_nand_planes_per_die @@ -8181,13 +8181,13 @@ FtlProgPages: // @FtlProgPages .LBB47_4: // =>This Loop Header: Depth=1 // Child Loop BB47_7 Depth 2 add x25, x20, x27, lsl #5 - add x24, x25, #4 // =4 + add x26, x25, #4 // =4 b .LBB47_7 .LBB47_5: // in Loop: Header=BB47_7 Depth=2 - ldr w1, [x24] + ldr w1, [x26] mov x0, x21 bl sftl_printk - ldr w8, [x24] + ldr w8, [x26] ldrh w9, [x22, :lo12:c_ftl_nand_blks_per_die] ldrh w10, [x23, :lo12:c_ftl_nand_planes_per_die] ubfx w8, w8, #10, #16 @@ -8200,10 +8200,11 @@ FtlProgPages: // @FtlProgPages .LBB47_6: // in Loop: Header=BB47_7 Depth=2 mov x0, x19 bl get_new_active_ppa - str w0, [x24] + str w0, [x26] mov w1, #1 mov x0, x25 mov w2, wzr + mov w24, #1 bl FlashProgPages .LBB47_7: // Parent Loop BB47_4 Depth=1 // => This Inner Loop Header: Depth=2 @@ -8219,26 +8220,27 @@ FtlProgPages: // @FtlProgPages bl allocate_new_data_superblock b .LBB47_6 .LBB47_10: // in Loop: Header=BB47_4 Depth=1 + adrp x9, c_ftl_nand_planes_num ldrb w8, [x19, #6] - ldrh w9, [x26, :lo12:c_ftl_nand_planes_num] + ldrh w9, [x9, :lo12:c_ftl_nand_planes_num] cmp w9, w8 b.hi .LBB47_12 // %bb.11: // in Loop: Header=BB47_4 Depth=1 adrp x0, .L.str.78 adrp x1, .L__func__.FtlProgPages - mov w2, #985 + mov w2, #986 add x0, x0, :lo12:.L.str.78 add x1, x1, :lo12:.L__func__.FtlProgPages bl sftl_printk .LBB47_12: // in Loop: Header=BB47_4 Depth=1 - ldr w8, [x24] - add x24, x20, x27, lsl #5 + ldr w8, [x26] + add x25, x20, x27, lsl #5 add x1, sp, #4 // =4 mov w2, #1 str w8, [sp, #4] - ldr w0, [x24, #24] + ldr w0, [x25, #24] bl log2phys - ldr x8, [x24, #16] + ldr x8, [x25, #16] ldr w8, [x8, #12] cmn w8, #1 // =1 b.eq .LBB47_3 @@ -8261,24 +8263,65 @@ FtlProgPages: // @FtlProgPages bl sftl_printk b .LBB47_2 .LBB47_15: - ldrb w8, [x19, #6] - ldrh w9, [x26, :lo12:c_ftl_nand_planes_num] - cmp w9, w8 - b.hi .LBB47_17 + cbz w24, .LBB47_22 // %bb.16: + bl FtlWriteDump_data + adrp x21, c_ftl_nand_l2pmap_ram_region_num + ldrh w8, [x21, :lo12:c_ftl_nand_l2pmap_ram_region_num] + cbz w8, .LBB47_21 +// %bb.17: + adrp x20, gL2pMapInfo + mov x22, xzr + mov x23, xzr + adrp x24, p_l2p_ram_map + add x20, x20, :lo12:gL2pMapInfo + b .LBB47_19 +.LBB47_18: // in Loop: Header=BB47_19 Depth=1 + add x23, x23, #1 // =1 + cmp x23, w8, uxth + add x22, x22, #16 // =16 + b.hs .LBB47_21 +.LBB47_19: // =>This Inner Loop Header: Depth=1 + ldr x9, [x24, :lo12:p_l2p_ram_map] + add x9, x9, x22 + ldr w10, [x9, #4] + tbz w10, #31, .LBB47_18 +// %bb.20: // in Loop: Header=BB47_19 Depth=1 + ldrh w1, [x9] + ldr x2, [x9, #8] + mov x0, x20 + bl FtlMapWritePage + ldr x8, [x24, :lo12:p_l2p_ram_map] + add x8, x8, x22 + ldr w9, [x8, #4] + and w9, w9, #0x7fffffff + str w9, [x8, #4] + ldrh w8, [x21, :lo12:c_ftl_nand_l2pmap_ram_region_num] + b .LBB47_18 +.LBB47_21: + mov w0, #1 + bl FtlEctTblFlush + bl FtlVpcTblFlush +.LBB47_22: + adrp x9, c_ftl_nand_planes_num + ldrb w8, [x19, #6] + ldrh w9, [x9, :lo12:c_ftl_nand_planes_num] + cmp w9, w8 + b.hi .LBB47_24 +// %bb.23: adrp x0, .L.str.78 adrp x1, .L__func__.FtlProgPages add x0, x0, :lo12:.L.str.78 add x1, x1, :lo12:.L__func__.FtlProgPages - mov w2, #1000 + mov w2, #1005 bl sftl_printk -.LBB47_17: +.LBB47_24: adrp x9, __stack_chk_guard ldr x8, [sp, #8] ldr x9, [x9, :lo12:__stack_chk_guard] cmp x9, x8 - b.ne .LBB47_19 -// %bb.18: + b.ne .LBB47_26 +// %bb.25: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ldp x22, x21, [sp, #80] // 16-byte Folded Reload ldp x24, x23, [sp, #64] // 16-byte Folded Reload @@ -8288,7 +8331,7 @@ FtlProgPages: // @FtlProgPages add sp, sp, #112 // =112 hint #29 ret -.LBB47_19: +.LBB47_26: bl __stack_chk_fail .Lfunc_end47: .size FtlProgPages, .Lfunc_end47-FtlProgPages @@ -8834,7 +8877,7 @@ FtlWrite: // @FtlWrite // %bb.26: // in Loop: Header=BB51_25 Depth=1 adrp x0, .L.str.78 adrp x1, .L__func__.FtlWrite - mov w2, #1041 + mov w2, #1046 add x0, x0, :lo12:.L.str.78 add x1, x1, :lo12:.L__func__.FtlWrite bl sftl_printk @@ -8903,7 +8946,7 @@ FtlWrite: // @FtlWrite // %bb.37: // in Loop: Header=BB51_25 Depth=1 adrp x0, .L.str.78 adrp x1, .L__func__.FtlWrite - mov w2, #1074 + mov w2, #1079 add x0, x0, :lo12:.L.str.78 add x1, x1, :lo12:.L__func__.FtlWrite bl sftl_printk @@ -9083,7 +9126,7 @@ FtlWrite: // @FtlWrite // %bb.56: // in Loop: Header=BB51_41 Depth=2 adrp x0, .L.str.78 adrp x1, .L__func__.FtlWrite - mov w2, #1128 + mov w2, #1133 add x0, x0, :lo12:.L.str.78 add x1, x1, :lo12:.L__func__.FtlWrite bl sftl_printk @@ -9142,7 +9185,7 @@ FtlWrite: // @FtlWrite // %bb.64: // in Loop: Header=BB51_41 Depth=2 adrp x0, .L.str.78 adrp x1, .L__func__.FtlWrite - mov w2, #1143 + mov w2, #1148 add x0, x0, :lo12:.L.str.78 add x1, x1, :lo12:.L__func__.FtlWrite bl sftl_printk @@ -9166,7 +9209,7 @@ FtlWrite: // @FtlWrite // %bb.68: // in Loop: Header=BB51_25 Depth=1 adrp x0, .L.str.78 adrp x1, .L__func__.FtlWrite - mov w2, #1152 + mov w2, #1157 add x0, x0, :lo12:.L.str.78 add x1, x1, :lo12:.L__func__.FtlWrite bl sftl_printk @@ -9526,13 +9569,13 @@ FtlWriteToIDB: // @FtlWriteToIDB mov w0, #262144 mov w1, #3265 mov w2, #6 - bl kmalloc_order_trace + bl kmalloc_order adrp x25, idb_buf str x0, [x25, :lo12:idb_buf] mov w0, #262144 mov w1, #3265 mov w2, #6 - bl kmalloc_order_trace + bl kmalloc_order adrp x8, gp_flash_check_buf str x0, [x8, :lo12:gp_flash_check_buf] cbz x0, .LBB56_15 @@ -9619,9 +9662,7 @@ FtlWriteToIDB: // @FtlWriteToIDB sub w8, w8, #2, lsl #12 // =8192 add x22, x22, w8, uxtw #2 .LBB56_18: - mov w8, #64960 - movk w8, #127, lsl #16 - add w8, w23, w8 + add w8, w23, #446 // =446 cmp w24, #575 // =575 csel w8, w8, wzr, hi sub w8, w20, w8 @@ -12884,7 +12925,7 @@ Ftl_save_ext_data: // @Ftl_save_ext_data ldr w10, [x11, :lo12:g_totle_gc_page_count] ldr w11, [x12, :lo12:g_totle_write_page_count] ldr w12, [x13, :lo12:g_totle_read_page_count] - mov w8, #87 + mov w8, #89 movk w8, #20480, lsl #16 adrp x14, g_totle_l2p_write_count adrp x15, g_totle_slc_erase_count @@ -19886,7 +19927,7 @@ rk_sftl_vendor_storage_init: // @rk_sftl_vendor_storage_init stp x22, x21, [sp, #32] // 16-byte Folded Spill stp x20, x19, [sp, #48] // 16-byte Folded Spill mov x29, sp - bl kmalloc_order_trace + bl kmalloc_order adrp x20, g_vendor str x0, [x20, :lo12:g_vendor] cbz x0, .LBB136_4 @@ -20204,126 +20245,232 @@ rk_sftl_vendor_write: // @rk_sftl_vendor_write rk_sftl_vendor_storage_ioctl: // @rk_sftl_vendor_storage_ioctl // %bb.0: hint #25 - stp x29, x30, [sp, #-48]! // 16-byte Folded Spill - adrp x8, kmalloc_caches+96 - ldr x0, [x8, :lo12:kmalloc_caches+96] - str x21, [sp, #16] // 8-byte Folded Spill - stp x20, x19, [sp, #32] // 16-byte Folded Spill - mov x20, x2 + stp x29, x30, [sp, #-64]! // 16-byte Folded Spill + adrp x8, kmalloc_caches+320 + ldr x0, [x8, :lo12:kmalloc_caches+320] + stp x22, x21, [sp, #32] // 16-byte Folded Spill mov w21, w1 mov w1, #3265 - mov w2, #4096 + str x23, [sp, #16] // 8-byte Folded Spill + stp x20, x19, [sp, #48] // 16-byte Folded Spill mov x29, sp - bl kmem_cache_alloc_trace - cbz x0, .LBB139_5 + mov x20, x2 + bl kmem_cache_alloc + cbz x0, .LBB139_14 // %bb.1: mov w8, #30210 movk w8, #16388, lsl #16 mov x19, x0 cmp w21, w8 - b.eq .LBB139_6 + b.eq .LBB139_15 // %bb.2: mov w8, #30209 movk w8, #16388, lsl #16 cmp w21, w8 - b.ne .LBB139_9 + b.ne .LBB139_29 // %bb.3: - mov w2, #8 - mov x0, x19 - mov x1, x20 - bl _copy_from_user - cbz x0, .LBB139_10 + //APP + mrs x22, SP_EL0 + //NO_APP + ldrb w10, [x22, #46] + ldr x9, [x22, #8] + lsl x8, x20, #8 + tbnz w10, #5, .LBB139_5 // %bb.4: - adrp x0, .L.str.131 - add x0, x0, :lo12:.L.str.131 - mov w1, #256 - b .LBB139_8 + ldr x11, [x22] + mov x10, x20 + tbz w11, #26, .LBB139_6 .LBB139_5: - mov x21, #-1 - b .LBB139_22 + and x10, x20, x8, asr #8 .LBB139_6: + //APP + adds x10, x10, #8 // =8 + csel x9, xzr, x9, hi + csinv x10, x10, xzr, lo + sbcs xzr, x10, x9 + cset x10, ls + + //NO_APP + cbz x10, .LBB139_37 +// %bb.7: + //APP + + + //NO_APP + ldr x9, [x22, #8] + and x23, x20, x8, asr #8 + //APP + bics xzr, x23, x9 + csel x1, x20, xzr, eq + + //NO_APP + //APP + csdb + //NO_APP mov w2, #8 mov x0, x19 - mov x1, x20 - bl _copy_from_user - cbz x0, .LBB139_16 -// %bb.7: - adrp x0, .L.str.131 - add x0, x0, :lo12:.L.str.131 - mov w1, #276 -.LBB139_8: - mov x2, x20 - bl sftl_printk -.LBB139_9: - mov x21, #-14 - b .LBB139_21 -.LBB139_10: + bl __arch_copy_from_user + //APP + + + //NO_APP + cbnz x0, .LBB139_38 +// %bb.8: ldr w8, [x19] mov w9, #17745 movk w9, #22098, lsl #16 cmp w8, w9 - b.ne .LBB139_20 -// %bb.11: + b.ne .LBB139_28 +// %bb.9: adrp x8, g_vendor ldr x8, [x8, :lo12:g_vendor] - cbz x8, .LBB139_20 -// %bb.12: + cbz x8, .LBB139_28 +// %bb.10: ldrh w9, [x8, #10] - cbz x9, .LBB139_20 -// %bb.13: + cbz x9, .LBB139_28 +// %bb.11: ldrh w12, [x19, #4] ldrh w10, [x19, #6] add x0, x19, #8 // =8 add x11, x8, #20 // =20 - mov x21, #-1 -.LBB139_14: // =>This Inner Loop Header: Depth=1 +.LBB139_12: // =>This Inner Loop Header: Depth=1 ldurh w13, [x11, #-4] cmp w13, w12 - b.eq .LBB139_23 -// %bb.15: // in Loop: Header=BB139_14 Depth=1 - add x11, x11, #8 // =8 + b.eq .LBB139_32 +// %bb.13: // in Loop: Header=BB139_12 Depth=1 subs x9, x9, #1 // =1 - b.ne .LBB139_14 - b .LBB139_21 -.LBB139_16: + add x11, x11, #8 // =8 + b.ne .LBB139_12 + b .LBB139_28 +.LBB139_14: + mov x21, #-1 + b .LBB139_31 +.LBB139_15: + //APP + mrs x22, SP_EL0 + //NO_APP + ldrb w9, [x22, #46] + ldr x8, [x22, #8] + tbnz w9, #5, .LBB139_17 +// %bb.16: + ldr x10, [x22] + mov x9, x20 + tbz w10, #26, .LBB139_18 +.LBB139_17: + lsl x9, x20, #8 + and x9, x20, x9, asr #8 +.LBB139_18: + //APP + adds x9, x9, #8 // =8 + csel x8, xzr, x8, hi + csinv x9, x9, xzr, lo + sbcs xzr, x9, x8 + cset x9, ls + + //NO_APP + cbz x9, .LBB139_40 +// %bb.19: + lsl x9, x20, #8 + //APP + + + //NO_APP + ldr x8, [x22, #8] + and x23, x20, x9, asr #8 + //APP + bics xzr, x23, x8 + csel x1, x20, xzr, eq + + //NO_APP + //APP + csdb + //NO_APP + mov w2, #8 + mov x0, x19 + bl __arch_copy_from_user + //APP + + + //NO_APP + cbnz x0, .LBB139_41 +// %bb.20: ldr w8, [x19] mov w9, #17745 movk w9, #22098, lsl #16 cmp w8, w9 - b.ne .LBB139_20 -// %bb.17: - ldrh w8, [x19, #6] - cmp x8, #4087 // =4087 - b.hi .LBB139_20 -// %bb.18: - add x21, x8, #8 // =8 + b.ne .LBB139_28 +// %bb.21: + ldrh w9, [x19, #6] + cmp x9, #4087 // =4087 + b.hi .LBB139_28 +// %bb.22: + ldrb w10, [x22, #46] + ldr x8, [x22, #8] + add x21, x9, #8 // =8 + tbnz w10, #5, .LBB139_24 +// %bb.23: + ldr x10, [x22] + mov x9, x20 + tbz w10, #26, .LBB139_25 +.LBB139_24: + mov x9, x23 +.LBB139_25: + //APP + adds x9, x9, x21 + csel x8, xzr, x8, hi + csinv x9, x9, xzr, lo + sbcs xzr, x9, x8 + cset x9, ls + + //NO_APP + cbz x9, .LBB139_43 +// %bb.26: + //APP + + + //NO_APP + ldr x8, [x22, #8] + //APP + bics xzr, x23, x8 + csel x1, x20, xzr, eq + + //NO_APP + //APP + csdb + //NO_APP mov x0, x19 - mov x1, x21 - mov w2, wzr - bl __check_object_size - mov x0, x19 - mov x1, x20 mov x2, x21 - bl _copy_from_user - cbz x0, .LBB139_24 -// %bb.19: - adrp x0, .L.str.131 - add x0, x0, :lo12:.L.str.131 - mov w1, #283 - b .LBB139_8 -.LBB139_20: + bl __arch_copy_from_user + //APP + + + //NO_APP + cbnz x0, .LBB139_44 +// %bb.27: + ldrh w0, [x19, #4] + ldrh w2, [x19, #6] + add x1, x19, #8 // =8 + bl rk_sftl_vendor_write + // kill: def $w0 killed $w0 def $x0 + sxtw x21, w0 + b .LBB139_30 +.LBB139_28: mov x21, #-1 -.LBB139_21: + b .LBB139_30 +.LBB139_29: + mov x21, #-14 +.LBB139_30: mov x0, x19 bl kfree -.LBB139_22: +.LBB139_31: mov x0, x21 - ldp x20, x19, [sp, #32] // 16-byte Folded Reload - ldr x21, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + ldp x20, x19, [sp, #48] // 16-byte Folded Reload + ldp x22, x21, [sp, #32] // 16-byte Folded Reload + ldr x23, [sp, #16] // 8-byte Folded Reload + ldp x29, x30, [sp], #64 // 16-byte Folded Reload hint #29 ret -.LBB139_23: +.LBB139_32: ldrh w9, [x11] ldurh w11, [x11, #-2] cmp w9, w10 @@ -20333,27 +20480,96 @@ rk_sftl_vendor_storage_ioctl: // @rk_sftl_vendor_storage_ioctl mov x2, x21 bl memcpy strh w21, [x19, #6] - add w21, w21, #8 // =8 - mov w2, #1 - mov x0, x19 - mov x1, x21 - bl __check_object_size - mov x0, x20 + ldrb w9, [x22, #46] + ldr x8, [x22, #8] + add w2, w21, #8 // =8 + tbnz w9, #5, .LBB139_34 +// %bb.33: + ldr x10, [x22] + mov x9, x20 + tbz w10, #26, .LBB139_35 +.LBB139_34: + mov x9, x23 +.LBB139_35: + mov x21, #-14 + //APP + adds x9, x9, x2 + csel x8, xzr, x8, hi + csinv x9, x9, xzr, lo + sbcs xzr, x9, x8 + cset x9, ls + + //NO_APP + cbz x9, .LBB139_30 +// %bb.36: + //APP + + + //NO_APP + ldr x8, [x22, #8] + //APP + bics xzr, x23, x8 + csel x0, x20, xzr, eq + + //NO_APP + //APP + csdb + //NO_APP mov x1, x19 - mov x2, x21 - bl _copy_to_user + bl __arch_copy_to_user cmp x0, #0 // =0 - mov x8, #-14 - csel x21, xzr, x8, eq - b .LBB139_21 -.LBB139_24: - ldrh w0, [x19, #4] - ldrh w2, [x19, #6] - add x1, x19, #8 // =8 - bl rk_sftl_vendor_write - // kill: def $w0 killed $w0 def $x0 - sxtw x21, w0 - b .LBB139_21 + //APP + + + //NO_APP + csel x21, xzr, x21, eq + b .LBB139_30 +.LBB139_37: + mov w2, #8 + b .LBB139_39 +.LBB139_38: + mov x2, x0 +.LBB139_39: + sub x8, x19, x2 + add x0, x8, #8 // =8 + mov w1, wzr + bl memset + adrp x0, .L.str.131 + add x0, x0, :lo12:.L.str.131 + mov w1, #256 + b .LBB139_46 +.LBB139_40: + mov w2, #8 + b .LBB139_42 +.LBB139_41: + mov x2, x0 +.LBB139_42: + sub x8, x19, x2 + add x0, x8, #8 // =8 + mov w1, wzr + bl memset + adrp x0, .L.str.131 + add x0, x0, :lo12:.L.str.131 + mov w1, #276 + b .LBB139_46 +.LBB139_43: + mov x2, x21 + b .LBB139_45 +.LBB139_44: + mov x2, x0 +.LBB139_45: + sub x8, x21, x2 + add x0, x19, x8 + mov w1, wzr + bl memset + adrp x0, .L.str.131 + add x0, x0, :lo12:.L.str.131 + mov w1, #283 +.LBB139_46: + mov x2, x20 + bl sftl_printk + mov x21, #-14 + b .LBB139_30 .Lfunc_end139: .size rk_sftl_vendor_storage_ioctl, .Lfunc_end139-rk_sftl_vendor_storage_ioctl // -- End function @@ -20374,1516 +20590,6 @@ rk_sftl_vendor_register: // @rk_sftl_vendor_register .Lfunc_end140: .size rk_sftl_vendor_register, .Lfunc_end140-rk_sftl_vendor_register // -- End function - .p2align 2 // -- Begin function _copy_from_user - .type _copy_from_user,@function -_copy_from_user: // @_copy_from_user -// %bb.0: - hint #25 - stp x29, x30, [sp, #-48]! // 16-byte Folded Spill - //APP - mrs x8, SP_EL0 - //NO_APP - ldrb w11, [x8, #54] - ldr x10, [x8, #8] - stp x20, x19, [sp, #32] // 16-byte Folded Spill - mov x19, x2 - mov x20, x0 - lsl x9, x1, #8 - str x21, [sp, #16] // 8-byte Folded Spill - mov x29, sp - tbnz w11, #5, .LBB141_2 -// %bb.1: - ldr x12, [x8] - mov x11, x1 - tbz w12, #26, .LBB141_3 -.LBB141_2: - and x11, x1, x9, asr #8 -.LBB141_3: - mov x21, x19 - //APP - adds x11, x11, x19 - csel x10, xzr, x10, hi - csinv x11, x11, xzr, lo - sbcs xzr, x11, x10 - cset x11, ls - - //NO_APP - cbz x11, .LBB141_18 -// %bb.4: - //APP -.Ltmp1: - b .Ltmp0 - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp2: - .word .Ltmp1-.Ltmp2 -.Ltmp3: - .word .Ltmp0-.Ltmp3 -.Ltmp4: - .xword (arm64_const_caps_ready+1)-.Ltmp4 - .text - - - //NO_APP -// %bb.5: - //APP -.Ltmp6: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp7: - .word .Ltmp6-.Ltmp7 -.Ltmp8: - .word .Ltmp5-.Ltmp8 -.Ltmp9: - .xword (cpu_hwcap_keys+64)-.Ltmp9 - .text - - - //NO_APP -.LBB141_6: - //APP -.Ltmp10: - mrs x10, DAIF -.Ltmp11: - .section .altinstructions,"a",@progbits -.Ltmp12: - .word .Ltmp10-.Ltmp12 -.Ltmp14: - .word .Ltmp13-.Ltmp14 - .hword 42 - .byte .Ltmp11-.Ltmp10 - .byte .Ltmp15-.Ltmp13 - .text - - .text 1 -.Ltmp13: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd538460a - - -.Ltmp15: -.Ltmp16: -.org (.Ltmp16-(.Ltmp15-.Ltmp13))+(.Ltmp11-.Ltmp10), 0 -.Ltmp17: -.org (.Ltmp17-(.Ltmp11-.Ltmp10))+(.Ltmp15-.Ltmp13), 0 - .text - - - //NO_APP - //APP -.Ltmp18: - and w11, w10, #0x80 -.Ltmp19: - .section .altinstructions,"a",@progbits -.Ltmp20: - .word .Ltmp18-.Ltmp20 -.Ltmp22: - .word .Ltmp21-.Ltmp22 - .hword 42 - .byte .Ltmp19-.Ltmp18 - .byte .Ltmp23-.Ltmp21 - .text - - .text 1 -.Ltmp21: - eor w11, w10, #0xe0 -.Ltmp23: -.Ltmp24: -.org (.Ltmp24-(.Ltmp23-.Ltmp21))+(.Ltmp19-.Ltmp18), 0 -.Ltmp25: -.org (.Ltmp25-(.Ltmp19-.Ltmp18))+(.Ltmp23-.Ltmp21), 0 - .text - - - //NO_APP - cbnz w11, .LBB141_10 -// %bb.7: - //APP -.Ltmp27: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp28: - .word .Ltmp27-.Ltmp28 -.Ltmp29: - .word .Ltmp26-.Ltmp29 -.Ltmp30: - .xword gic_nonsecure_priorities-.Ltmp30 - .text - - - //NO_APP -// %bb.8: - mov w11, #96 -.LBB141_9: - //APP -.Ltmp31: - msr DAIFSet, #2 // arch_local_irq_disable -.Ltmp32: - .section .altinstructions,"a",@progbits -.Ltmp33: - .word .Ltmp31-.Ltmp33 -.Ltmp35: - .word .Ltmp34-.Ltmp35 - .hword 42 - .byte .Ltmp32-.Ltmp31 - .byte .Ltmp36-.Ltmp34 - .text - - .text 1 -.Ltmp34: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd518460b - - -.Ltmp36: -.Ltmp37: -.org (.Ltmp37-(.Ltmp36-.Ltmp34))+(.Ltmp32-.Ltmp31), 0 -.Ltmp38: -.org (.Ltmp38-(.Ltmp32-.Ltmp31))+(.Ltmp36-.Ltmp34), 0 - .text - - - //NO_APP -.LBB141_10: - ldr x11, [x8, #16] - //APP - mrs x12, TTBR1_EL1 - //NO_APP - mov x13, x11 - bfxil x13, x12, #0, #48 - //APP - msr TTBR1_EL1, x13 - //NO_APP - //APP - isb - //NO_APP - //APP - msr TTBR0_EL1, x11 - //NO_APP - //APP - isb - //NO_APP - //APP -.Ltmp39: - msr DAIF, x10 -.Ltmp40: - .section .altinstructions,"a",@progbits -.Ltmp41: - .word .Ltmp39-.Ltmp41 -.Ltmp43: - .word .Ltmp42-.Ltmp43 - .hword 42 - .byte .Ltmp40-.Ltmp39 - .byte .Ltmp44-.Ltmp42 - .text - - .text 1 -.Ltmp42: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd518460a - - -.Ltmp44: -.Ltmp45: -.org (.Ltmp45-(.Ltmp44-.Ltmp42))+(.Ltmp40-.Ltmp39), 0 -.Ltmp46: -.org (.Ltmp46-(.Ltmp40-.Ltmp39))+(.Ltmp44-.Ltmp42), 0 - .text - - - //NO_APP - //APP -.Ltmp48: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp49: - .word .Ltmp48-.Ltmp49 -.Ltmp50: - .word .Ltmp47-.Ltmp50 -.Ltmp51: - .xword gic_pmr_sync-.Ltmp51 - .text - - - //NO_APP -.LBB141_11: - ldr x10, [x8, #8] - and x9, x1, x9, asr #8 - //APP - bics xzr, x9, x10 - csel x8, x1, xzr, eq - - //NO_APP - //APP - csdb - //NO_APP - mov x0, x20 - mov x1, x8 - mov x2, x19 - bl __arch_copy_from_user - mov x21, x0 - //APP -.Ltmp53: - b .Ltmp52 - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp54: - .word .Ltmp53-.Ltmp54 -.Ltmp55: - .word .Ltmp52-.Ltmp55 -.Ltmp56: - .xword (arm64_const_caps_ready+1)-.Ltmp56 - .text - - - //NO_APP -// %bb.12: - //APP -.Ltmp58: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp59: - .word .Ltmp58-.Ltmp59 -.Ltmp60: - .word .Ltmp57-.Ltmp60 -.Ltmp61: - .xword (cpu_hwcap_keys+64)-.Ltmp61 - .text - - - //NO_APP -.LBB141_13: - //APP -.Ltmp62: - mrs x8, DAIF -.Ltmp63: - .section .altinstructions,"a",@progbits -.Ltmp64: - .word .Ltmp62-.Ltmp64 -.Ltmp66: - .word .Ltmp65-.Ltmp66 - .hword 42 - .byte .Ltmp63-.Ltmp62 - .byte .Ltmp67-.Ltmp65 - .text - - .text 1 -.Ltmp65: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd5384608 - - -.Ltmp67: -.Ltmp68: -.org (.Ltmp68-(.Ltmp67-.Ltmp65))+(.Ltmp63-.Ltmp62), 0 -.Ltmp69: -.org (.Ltmp69-(.Ltmp63-.Ltmp62))+(.Ltmp67-.Ltmp65), 0 - .text - - - //NO_APP - //APP -.Ltmp70: - and w9, w8, #0x80 -.Ltmp71: - .section .altinstructions,"a",@progbits -.Ltmp72: - .word .Ltmp70-.Ltmp72 -.Ltmp74: - .word .Ltmp73-.Ltmp74 - .hword 42 - .byte .Ltmp71-.Ltmp70 - .byte .Ltmp75-.Ltmp73 - .text - - .text 1 -.Ltmp73: - eor w9, w8, #0xe0 -.Ltmp75: -.Ltmp76: -.org (.Ltmp76-(.Ltmp75-.Ltmp73))+(.Ltmp71-.Ltmp70), 0 -.Ltmp77: -.org (.Ltmp77-(.Ltmp71-.Ltmp70))+(.Ltmp75-.Ltmp73), 0 - .text - - - //NO_APP - cbnz w9, .LBB141_17 -// %bb.14: - //APP -.Ltmp79: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp80: - .word .Ltmp79-.Ltmp80 -.Ltmp81: - .word .Ltmp78-.Ltmp81 -.Ltmp82: - .xword gic_nonsecure_priorities-.Ltmp82 - .text - - - //NO_APP -// %bb.15: - mov w9, #96 -.LBB141_16: - //APP -.Ltmp83: - msr DAIFSet, #2 // arch_local_irq_disable -.Ltmp84: - .section .altinstructions,"a",@progbits -.Ltmp85: - .word .Ltmp83-.Ltmp85 -.Ltmp87: - .word .Ltmp86-.Ltmp87 - .hword 42 - .byte .Ltmp84-.Ltmp83 - .byte .Ltmp88-.Ltmp86 - .text - - .text 1 -.Ltmp86: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd5184609 - - -.Ltmp88: -.Ltmp89: -.org (.Ltmp89-(.Ltmp88-.Ltmp86))+(.Ltmp84-.Ltmp83), 0 -.Ltmp90: -.org (.Ltmp90-(.Ltmp84-.Ltmp83))+(.Ltmp88-.Ltmp86), 0 - .text - - - //NO_APP -.LBB141_17: - //APP - mrs x9, TTBR1_EL1 - //NO_APP - and x9, x9, #0xffffffffffff - sub x10, x9, #1, lsl #12 // =4096 - //APP - msr TTBR0_EL1, x10 - //NO_APP - //APP - isb - //NO_APP - //APP - msr TTBR1_EL1, x9 - //NO_APP - //APP - isb - //NO_APP - //APP -.Ltmp91: - msr DAIF, x8 -.Ltmp92: - .section .altinstructions,"a",@progbits -.Ltmp93: - .word .Ltmp91-.Ltmp93 -.Ltmp95: - .word .Ltmp94-.Ltmp95 - .hword 42 - .byte .Ltmp92-.Ltmp91 - .byte .Ltmp96-.Ltmp94 - .text - - .text 1 -.Ltmp94: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd5184608 - - -.Ltmp96: -.Ltmp97: -.org (.Ltmp97-(.Ltmp96-.Ltmp94))+(.Ltmp92-.Ltmp91), 0 -.Ltmp98: -.org (.Ltmp98-(.Ltmp92-.Ltmp91))+(.Ltmp96-.Ltmp94), 0 - .text - - - //NO_APP - //APP -.Ltmp100: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp101: - .word .Ltmp100-.Ltmp101 -.Ltmp102: - .word .Ltmp99-.Ltmp102 -.Ltmp103: - .xword gic_pmr_sync-.Ltmp103 - .text - - - //NO_APP -.LBB141_18: - cbnz x21, .LBB141_20 -.LBB141_19: - mov x0, x21 - ldp x20, x19, [sp, #32] // 16-byte Folded Reload - ldr x21, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload - hint #29 - ret -.LBB141_20: - sub x8, x19, x21 - add x0, x20, x8 - mov w1, wzr - mov x2, x21 - bl memset - b .LBB141_19 -.Ltmp0: // Block address taken -.LBB141_21: - hint #36 - adrp x10, cpu_hwcaps - ldr x10, [x10, :lo12:cpu_hwcaps] - tbz w10, #4, .LBB141_6 -.Ltmp5: // Block address taken -.LBB141_22: - hint #36 - //APP -.Ltmp104: - nop -.Ltmp105: - .section .altinstructions,"a",@progbits -.Ltmp106: - .word .Ltmp104-.Ltmp106 -.Ltmp108: - .word .Ltmp107-.Ltmp108 - .hword 10 - .byte .Ltmp105-.Ltmp104 - .byte .Ltmp109-.Ltmp107 - .text - - .text 1 -.Ltmp107: - .inst 0xd500409f - -.Ltmp109: -.Ltmp110: -.org (.Ltmp110-(.Ltmp109-.Ltmp107))+(.Ltmp105-.Ltmp104), 0 -.Ltmp111: -.org (.Ltmp111-(.Ltmp105-.Ltmp104))+(.Ltmp109-.Ltmp107), 0 - .text - - - //NO_APP - b .LBB141_11 -.Ltmp47: // Block address taken -.LBB141_23: - hint #36 - //APP - dsb sy - //NO_APP - b .LBB141_11 -.Ltmp52: // Block address taken -.LBB141_24: - hint #36 - adrp x8, cpu_hwcaps - ldr x8, [x8, :lo12:cpu_hwcaps] - tbz w8, #4, .LBB141_13 -.Ltmp57: // Block address taken -.LBB141_25: - hint #36 - //APP -.Ltmp112: - nop -.Ltmp113: - .section .altinstructions,"a",@progbits -.Ltmp114: - .word .Ltmp112-.Ltmp114 -.Ltmp116: - .word .Ltmp115-.Ltmp116 - .hword 10 - .byte .Ltmp113-.Ltmp112 - .byte .Ltmp117-.Ltmp115 - .text - - .text 1 -.Ltmp115: - .inst 0xd500419f - -.Ltmp117: -.Ltmp118: -.org (.Ltmp118-(.Ltmp117-.Ltmp115))+(.Ltmp113-.Ltmp112), 0 -.Ltmp119: -.org (.Ltmp119-(.Ltmp113-.Ltmp112))+(.Ltmp117-.Ltmp115), 0 - .text - - - //NO_APP - b .LBB141_18 -.Ltmp99: // Block address taken -.LBB141_26: - hint #36 - //APP - dsb sy - //NO_APP - b .LBB141_18 -.Ltmp26: // Block address taken -.LBB141_27: - hint #36 - mov w11, #160 - b .LBB141_9 -.Ltmp78: // Block address taken -.LBB141_28: - hint #36 - mov w9, #160 - b .LBB141_16 -.Lfunc_end141: - .size _copy_from_user, .Lfunc_end141-_copy_from_user - // -- End function - .p2align 2 // -- Begin function _copy_to_user - .type _copy_to_user,@function -_copy_to_user: // @_copy_to_user -// %bb.0: - hint #25 - stp x29, x30, [sp, #-16]! // 16-byte Folded Spill - //APP - mrs x8, SP_EL0 - //NO_APP - ldrb w11, [x8, #54] - ldr x10, [x8, #8] - lsl x9, x0, #8 - mov x29, sp - tbnz w11, #5, .LBB142_2 -// %bb.1: - ldr x12, [x8] - mov x11, x0 - tbz w12, #26, .LBB142_3 -.LBB142_2: - and x11, x0, x9, asr #8 -.LBB142_3: - //APP - adds x11, x11, x2 - csel x10, xzr, x10, hi - csinv x11, x11, xzr, lo - sbcs xzr, x11, x10 - cset x11, ls - - //NO_APP - cbz x11, .LBB142_18 -// %bb.4: - //APP -.Ltmp121: - b .Ltmp120 - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp122: - .word .Ltmp121-.Ltmp122 -.Ltmp123: - .word .Ltmp120-.Ltmp123 -.Ltmp124: - .xword (arm64_const_caps_ready+1)-.Ltmp124 - .text - - - //NO_APP -// %bb.5: - //APP -.Ltmp126: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp127: - .word .Ltmp126-.Ltmp127 -.Ltmp128: - .word .Ltmp125-.Ltmp128 -.Ltmp129: - .xword (cpu_hwcap_keys+64)-.Ltmp129 - .text - - - //NO_APP -.LBB142_6: - //APP -.Ltmp130: - mrs x10, DAIF -.Ltmp131: - .section .altinstructions,"a",@progbits -.Ltmp132: - .word .Ltmp130-.Ltmp132 -.Ltmp134: - .word .Ltmp133-.Ltmp134 - .hword 42 - .byte .Ltmp131-.Ltmp130 - .byte .Ltmp135-.Ltmp133 - .text - - .text 1 -.Ltmp133: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd538460a - - -.Ltmp135: -.Ltmp136: -.org (.Ltmp136-(.Ltmp135-.Ltmp133))+(.Ltmp131-.Ltmp130), 0 -.Ltmp137: -.org (.Ltmp137-(.Ltmp131-.Ltmp130))+(.Ltmp135-.Ltmp133), 0 - .text - - - //NO_APP - //APP -.Ltmp138: - and w11, w10, #0x80 -.Ltmp139: - .section .altinstructions,"a",@progbits -.Ltmp140: - .word .Ltmp138-.Ltmp140 -.Ltmp142: - .word .Ltmp141-.Ltmp142 - .hword 42 - .byte .Ltmp139-.Ltmp138 - .byte .Ltmp143-.Ltmp141 - .text - - .text 1 -.Ltmp141: - eor w11, w10, #0xe0 -.Ltmp143: -.Ltmp144: -.org (.Ltmp144-(.Ltmp143-.Ltmp141))+(.Ltmp139-.Ltmp138), 0 -.Ltmp145: -.org (.Ltmp145-(.Ltmp139-.Ltmp138))+(.Ltmp143-.Ltmp141), 0 - .text - - - //NO_APP - cbnz w11, .LBB142_10 -// %bb.7: - //APP -.Ltmp147: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp148: - .word .Ltmp147-.Ltmp148 -.Ltmp149: - .word .Ltmp146-.Ltmp149 -.Ltmp150: - .xword gic_nonsecure_priorities-.Ltmp150 - .text - - - //NO_APP -// %bb.8: - mov w11, #96 -.LBB142_9: - //APP -.Ltmp151: - msr DAIFSet, #2 // arch_local_irq_disable -.Ltmp152: - .section .altinstructions,"a",@progbits -.Ltmp153: - .word .Ltmp151-.Ltmp153 -.Ltmp155: - .word .Ltmp154-.Ltmp155 - .hword 42 - .byte .Ltmp152-.Ltmp151 - .byte .Ltmp156-.Ltmp154 - .text - - .text 1 -.Ltmp154: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd518460b - - -.Ltmp156: -.Ltmp157: -.org (.Ltmp157-(.Ltmp156-.Ltmp154))+(.Ltmp152-.Ltmp151), 0 -.Ltmp158: -.org (.Ltmp158-(.Ltmp152-.Ltmp151))+(.Ltmp156-.Ltmp154), 0 - .text - - - //NO_APP -.LBB142_10: - ldr x11, [x8, #16] - //APP - mrs x12, TTBR1_EL1 - //NO_APP - mov x13, x11 - bfxil x13, x12, #0, #48 - //APP - msr TTBR1_EL1, x13 - //NO_APP - //APP - isb - //NO_APP - //APP - msr TTBR0_EL1, x11 - //NO_APP - //APP - isb - //NO_APP - //APP -.Ltmp159: - msr DAIF, x10 -.Ltmp160: - .section .altinstructions,"a",@progbits -.Ltmp161: - .word .Ltmp159-.Ltmp161 -.Ltmp163: - .word .Ltmp162-.Ltmp163 - .hword 42 - .byte .Ltmp160-.Ltmp159 - .byte .Ltmp164-.Ltmp162 - .text - - .text 1 -.Ltmp162: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd518460a - - -.Ltmp164: -.Ltmp165: -.org (.Ltmp165-(.Ltmp164-.Ltmp162))+(.Ltmp160-.Ltmp159), 0 -.Ltmp166: -.org (.Ltmp166-(.Ltmp160-.Ltmp159))+(.Ltmp164-.Ltmp162), 0 - .text - - - //NO_APP - //APP -.Ltmp168: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp169: - .word .Ltmp168-.Ltmp169 -.Ltmp170: - .word .Ltmp167-.Ltmp170 -.Ltmp171: - .xword gic_pmr_sync-.Ltmp171 - .text - - - //NO_APP -.LBB142_11: - ldr x10, [x8, #8] - and x9, x0, x9, asr #8 - //APP - bics xzr, x9, x10 - csel x8, x0, xzr, eq - - //NO_APP - //APP - csdb - //NO_APP - mov x0, x8 - bl __arch_copy_to_user - mov x2, x0 - //APP -.Ltmp173: - b .Ltmp172 - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp174: - .word .Ltmp173-.Ltmp174 -.Ltmp175: - .word .Ltmp172-.Ltmp175 -.Ltmp176: - .xword (arm64_const_caps_ready+1)-.Ltmp176 - .text - - - //NO_APP -// %bb.12: - //APP -.Ltmp178: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp179: - .word .Ltmp178-.Ltmp179 -.Ltmp180: - .word .Ltmp177-.Ltmp180 -.Ltmp181: - .xword (cpu_hwcap_keys+64)-.Ltmp181 - .text - - - //NO_APP -.LBB142_13: - //APP -.Ltmp182: - mrs x8, DAIF -.Ltmp183: - .section .altinstructions,"a",@progbits -.Ltmp184: - .word .Ltmp182-.Ltmp184 -.Ltmp186: - .word .Ltmp185-.Ltmp186 - .hword 42 - .byte .Ltmp183-.Ltmp182 - .byte .Ltmp187-.Ltmp185 - .text - - .text 1 -.Ltmp185: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd5384608 - - -.Ltmp187: -.Ltmp188: -.org (.Ltmp188-(.Ltmp187-.Ltmp185))+(.Ltmp183-.Ltmp182), 0 -.Ltmp189: -.org (.Ltmp189-(.Ltmp183-.Ltmp182))+(.Ltmp187-.Ltmp185), 0 - .text - - - //NO_APP - //APP -.Ltmp190: - and w9, w8, #0x80 -.Ltmp191: - .section .altinstructions,"a",@progbits -.Ltmp192: - .word .Ltmp190-.Ltmp192 -.Ltmp194: - .word .Ltmp193-.Ltmp194 - .hword 42 - .byte .Ltmp191-.Ltmp190 - .byte .Ltmp195-.Ltmp193 - .text - - .text 1 -.Ltmp193: - eor w9, w8, #0xe0 -.Ltmp195: -.Ltmp196: -.org (.Ltmp196-(.Ltmp195-.Ltmp193))+(.Ltmp191-.Ltmp190), 0 -.Ltmp197: -.org (.Ltmp197-(.Ltmp191-.Ltmp190))+(.Ltmp195-.Ltmp193), 0 - .text - - - //NO_APP - cbnz w9, .LBB142_17 -// %bb.14: - //APP -.Ltmp199: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp200: - .word .Ltmp199-.Ltmp200 -.Ltmp201: - .word .Ltmp198-.Ltmp201 -.Ltmp202: - .xword gic_nonsecure_priorities-.Ltmp202 - .text - - - //NO_APP -// %bb.15: - mov w9, #96 -.LBB142_16: - //APP -.Ltmp203: - msr DAIFSet, #2 // arch_local_irq_disable -.Ltmp204: - .section .altinstructions,"a",@progbits -.Ltmp205: - .word .Ltmp203-.Ltmp205 -.Ltmp207: - .word .Ltmp206-.Ltmp207 - .hword 42 - .byte .Ltmp204-.Ltmp203 - .byte .Ltmp208-.Ltmp206 - .text - - .text 1 -.Ltmp206: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd5184609 - - -.Ltmp208: -.Ltmp209: -.org (.Ltmp209-(.Ltmp208-.Ltmp206))+(.Ltmp204-.Ltmp203), 0 -.Ltmp210: -.org (.Ltmp210-(.Ltmp204-.Ltmp203))+(.Ltmp208-.Ltmp206), 0 - .text - - - //NO_APP -.LBB142_17: - //APP - mrs x9, TTBR1_EL1 - //NO_APP - and x9, x9, #0xffffffffffff - sub x10, x9, #1, lsl #12 // =4096 - //APP - msr TTBR0_EL1, x10 - //NO_APP - //APP - isb - //NO_APP - //APP - msr TTBR1_EL1, x9 - //NO_APP - //APP - isb - //NO_APP - //APP -.Ltmp211: - msr DAIF, x8 -.Ltmp212: - .section .altinstructions,"a",@progbits -.Ltmp213: - .word .Ltmp211-.Ltmp213 -.Ltmp215: - .word .Ltmp214-.Ltmp215 - .hword 42 - .byte .Ltmp212-.Ltmp211 - .byte .Ltmp216-.Ltmp214 - .text - - .text 1 -.Ltmp214: -.set .L__reg_num_x0, 0 -.set .L__reg_num_x1, 1 -.set .L__reg_num_x2, 2 -.set .L__reg_num_x3, 3 -.set .L__reg_num_x4, 4 -.set .L__reg_num_x5, 5 -.set .L__reg_num_x6, 6 -.set .L__reg_num_x7, 7 -.set .L__reg_num_x8, 8 -.set .L__reg_num_x9, 9 -.set .L__reg_num_x10, 10 -.set .L__reg_num_x11, 11 -.set .L__reg_num_x12, 12 -.set .L__reg_num_x13, 13 -.set .L__reg_num_x14, 14 -.set .L__reg_num_x15, 15 -.set .L__reg_num_x16, 16 -.set .L__reg_num_x17, 17 -.set .L__reg_num_x18, 18 -.set .L__reg_num_x19, 19 -.set .L__reg_num_x20, 20 -.set .L__reg_num_x21, 21 -.set .L__reg_num_x22, 22 -.set .L__reg_num_x23, 23 -.set .L__reg_num_x24, 24 -.set .L__reg_num_x25, 25 -.set .L__reg_num_x26, 26 -.set .L__reg_num_x27, 27 -.set .L__reg_num_x28, 28 -.set .L__reg_num_x29, 29 -.set .L__reg_num_x30, 30 - -.set .L__reg_num_xzr, 31 - - .inst 0xd5184608 - - -.Ltmp216: -.Ltmp217: -.org (.Ltmp217-(.Ltmp216-.Ltmp214))+(.Ltmp212-.Ltmp211), 0 -.Ltmp218: -.org (.Ltmp218-(.Ltmp212-.Ltmp211))+(.Ltmp216-.Ltmp214), 0 - .text - - - //NO_APP - //APP -.Ltmp220: - nop - .section __jump_table,"aw",@progbits - .p2align 3 -.Ltmp221: - .word .Ltmp220-.Ltmp221 -.Ltmp222: - .word .Ltmp219-.Ltmp222 -.Ltmp223: - .xword gic_pmr_sync-.Ltmp223 - .text - - - //NO_APP -.LBB142_18: - mov x0, x2 - ldp x29, x30, [sp], #16 // 16-byte Folded Reload - hint #29 - ret -.Ltmp120: // Block address taken -.LBB142_19: - hint #36 - adrp x10, cpu_hwcaps - ldr x10, [x10, :lo12:cpu_hwcaps] - tbz w10, #4, .LBB142_6 -.Ltmp125: // Block address taken -.LBB142_20: - hint #36 - //APP -.Ltmp224: - nop -.Ltmp225: - .section .altinstructions,"a",@progbits -.Ltmp226: - .word .Ltmp224-.Ltmp226 -.Ltmp228: - .word .Ltmp227-.Ltmp228 - .hword 10 - .byte .Ltmp225-.Ltmp224 - .byte .Ltmp229-.Ltmp227 - .text - - .text 1 -.Ltmp227: - .inst 0xd500409f - -.Ltmp229: -.Ltmp230: -.org (.Ltmp230-(.Ltmp229-.Ltmp227))+(.Ltmp225-.Ltmp224), 0 -.Ltmp231: -.org (.Ltmp231-(.Ltmp225-.Ltmp224))+(.Ltmp229-.Ltmp227), 0 - .text - - - //NO_APP - b .LBB142_11 -.Ltmp167: // Block address taken -.LBB142_21: - hint #36 - //APP - dsb sy - //NO_APP - b .LBB142_11 -.Ltmp172: // Block address taken -.LBB142_22: - hint #36 - adrp x8, cpu_hwcaps - ldr x8, [x8, :lo12:cpu_hwcaps] - tbz w8, #4, .LBB142_13 -.Ltmp177: // Block address taken -.LBB142_23: - hint #36 - //APP -.Ltmp232: - nop -.Ltmp233: - .section .altinstructions,"a",@progbits -.Ltmp234: - .word .Ltmp232-.Ltmp234 -.Ltmp236: - .word .Ltmp235-.Ltmp236 - .hword 10 - .byte .Ltmp233-.Ltmp232 - .byte .Ltmp237-.Ltmp235 - .text - - .text 1 -.Ltmp235: - .inst 0xd500419f - -.Ltmp237: -.Ltmp238: -.org (.Ltmp238-(.Ltmp237-.Ltmp235))+(.Ltmp233-.Ltmp232), 0 -.Ltmp239: -.org (.Ltmp239-(.Ltmp233-.Ltmp232))+(.Ltmp237-.Ltmp235), 0 - .text - - - //NO_APP - b .LBB142_18 -.Ltmp219: // Block address taken -.LBB142_24: - hint #36 - //APP - dsb sy - //NO_APP - b .LBB142_18 -.Ltmp146: // Block address taken -.LBB142_25: - hint #36 - mov w11, #160 - b .LBB142_9 -.Ltmp198: // Block address taken -.LBB142_26: - hint #36 - mov w9, #160 - b .LBB142_16 -.Lfunc_end142: - .size _copy_to_user, .Lfunc_end142-_copy_to_user - // -- End function .type gFtlInitStatus,@object // @gFtlInitStatus .data .globl gFtlInitStatus @@ -21900,7 +20606,7 @@ gFtlInitStatus: .type .L.str.1,@object // @.str.1 .L.str.1: - .asciz "SFTL version: 5.0.58 20220814" + .asciz "SFTL version: 5.0.59 20221121" .size .L.str.1, 30 .type .L.str.2,@object // @.str.2 @@ -23996,9 +22702,4 @@ rk_sftl_vendor_storage_fops: .addrsig_sym gL2pMapInfo .addrsig_sym gVendorBlkInfo .addrsig_sym rkflash_vender_storage_dev - .addrsig_sym arm64_const_caps_ready - .addrsig_sym cpu_hwcap_keys - .addrsig_sym cpu_hwcaps - .addrsig_sym gic_nonsecure_priorities - .addrsig_sym gic_pmr_sync .addrsig_sym rk_sftl_vendor_storage_fops diff --git a/drivers/rkflash/rkflash_api.h b/drivers/rkflash/rkflash_api.h index 402109b60521..7ddc9eafe960 100644 --- a/drivers/rkflash/rkflash_api.h +++ b/drivers/rkflash/rkflash_api.h @@ -41,7 +41,7 @@ struct flash_boot_ops { int (*discard)(u32 sec, u32 n_sec); }; -#ifdef CONFIG_RK_NANDC_NAND +#if IS_REACHABLE(CONFIG_RK_NANDC_NAND) extern const struct flash_boot_ops nandc_nand_ops; #endif diff --git a/drivers/rkflash/sfc_nand_boot.c b/drivers/rkflash/sfc_nand_boot.c index 4c0bd84d7236..d31628a4e612 100644 --- a/drivers/rkflash/sfc_nand_boot.c +++ b/drivers/rkflash/sfc_nand_boot.c @@ -4,6 +4,7 @@ #include +#include "sfc_nand.h" #include "rkflash_api.h" #include "rk_sftl.h" @@ -17,10 +18,10 @@ static int snand_init(void __iomem *reg_addr) ret = sfc_nand_init(); if (ret == 0) { sfnand_dev = sfc_nand_get_private_dev(); -#if defined(CONFIG_RK_SFTL) +#if IS_REACHABLE(CONFIG_RK_SFTL) sfc_nand_ftl_ops_init(); ret = sftl_init(); -#elif !defined(CONFIG_RK_SFC_NAND_MTD) +#elif !IS_REACHABLE(CONFIG_RK_SFC_NAND_MTD) #error "When CONFIG_RK_SFC_NAND_MTD is not used, CONFIG_RK_SFTL is required!" #endif } From 959f662239cfa30d1d4f8e1ca1790be7a12cfe48 Mon Sep 17 00:00:00 2001 From: cww Date: Wed, 22 Mar 2023 10:26:41 +0800 Subject: [PATCH 68/79] ARM: dts: rockchip: rv1106-thunder-boot: reserved_mem add rkisp1_thunderboot Signed-off-by: cww Change-Id: I5b9dc2321efbf5f851a8a5afeba0e4d63d1cc554 --- arch/arm/boot/dts/rv1106-thunder-boot.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/rv1106-thunder-boot.dtsi b/arch/arm/boot/dts/rv1106-thunder-boot.dtsi index 410b1dd349f5..ea2557ce39c2 100644 --- a/arch/arm/boot/dts/rv1106-thunder-boot.dtsi +++ b/arch/arm/boot/dts/rv1106-thunder-boot.dtsi @@ -44,6 +44,10 @@ ramdisk_c: ramdisk_c { reg = <0x1cec000 (5 * 0x00100000)>; }; + + rkisp1_thunderboot: rkisp1_thunderboot { + /* vicap capture for other camera */ + }; }; thunder_boot_rkisp: thunder-boot-rkisp { From 379493488c7e5135bb406e9fa1b7a10e801073b9 Mon Sep 17 00:00:00 2001 From: cww Date: Sat, 11 Feb 2023 11:57:26 +0800 Subject: [PATCH 69/79] ARM: dts: rockchip: add rv1106g-evb2-v10-dual-camera.dts Signed-off-by: cww Change-Id: I7be6cb6a8a4cfccbf24344239b224cb51cdf0bae --- arch/arm/boot/dts/Makefile | 1 + .../boot/dts/rv1106g-evb2-v10-dual-camera.dts | 418 ++++++++++++++++++ 2 files changed, 419 insertions(+) create mode 100644 arch/arm/boot/dts/rv1106g-evb2-v10-dual-camera.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 898abab0e57f..d535eb9e01f9 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -998,6 +998,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += \ rv1106g-evb1-v10-spi-nand.dtb \ rv1106g-evb1-v10-spi-nor.dtb \ rv1106g-evb2-v10.dtb \ + rv1106g-evb2-v10-dual-camera.dtb \ rv1106g-evb2-v11-emmc.dtb \ rv1106g-smart-door-lock-rmsl-v10.dtb \ rv1106g-smart-door-lock-rmsl-v12.dtb \ diff --git a/arch/arm/boot/dts/rv1106g-evb2-v10-dual-camera.dts b/arch/arm/boot/dts/rv1106g-evb2-v10-dual-camera.dts new file mode 100644 index 000000000000..06cc1407fb6c --- /dev/null +++ b/arch/arm/boot/dts/rv1106g-evb2-v10-dual-camera.dts @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2023 Rockchip Electronics Co., Ltd. + */ + +/dts-v1/; + +#include "rv1106.dtsi" +#include "rv1106-evb-v10.dtsi" +#include "rv1106-thunder-boot-spi-nor.dtsi" + +/ { + model = "Rockchip RV1106G EVB2 V10 Board With Dual Camera"; + compatible = "rockchip,rv1106g-evb2-v10-dual-camera", "rockchip,rv1106"; + + chosen { + bootargs = "loglevel=0 rootfstype=erofs rootflags=dax console=ttyFIQ0 root=/dev/rd0 snd_soc_core.prealloc_buffer_size_kbytes=16 coherent_pool=0 driver_async_probe=dwmmc_rockchip"; + }; + + vcc_1v8: vcc-1v8 { + compatible = "regulator-fixed"; + regulator-name = "vcc_1v8"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + vcc_3v3: vcc-3v3 { + compatible = "regulator-fixed"; + regulator-name = "vcc_3v3"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + vcc3v3_sd: vcc3v3-sd { + compatible = "regulator-fixed"; + gpio = <&gpio2 RK_PA7 GPIO_ACTIVE_LOW>; + regulator-name = "vcc3v3_sd"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + pinctrl-names = "default"; + pinctrl-0 = <&sdmmc_pwren>; + }; +}; + +&csi2_dphy_hw { + status = "okay"; +}; + +&csi2_dphy1 { + status = "okay"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + csi_dphy_input0: endpoint@0 { + reg = <0>; + remote-endpoint = <&sc3338_30_out>; + data-lanes = <1 2>; + }; + }; + + port@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + csi_dphy_output0: endpoint@0 { + reg = <0>; + remote-endpoint = <&mipi0_csi2_input>; + }; + }; + }; +}; + +&csi2_dphy2 { + status = "okay"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + csi_dphy_input1: endpoint@0 { + reg = <0>; + remote-endpoint = <&sc3338_32_out>; + data-lanes = <1 2>; + }; + }; + + port@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + csi_dphy_output1: endpoint@0 { + reg = <0>; + remote-endpoint = <&mipi1_csi2_input>; + }; + }; + }; +}; + +&emmc { + status = "disabled"; +}; + +&fiq_debugger { + rockchip,baudrate = <1500000>; + pinctrl-names = "default"; + pinctrl-0 = <&uart2m1_xfer>; +}; + +&i2c4 { + rockchip,amp-shared; + + sc3338_30: sc3338_30@30 { + compatible = "smartsens,sc3338"; + status = "okay"; + reg = <0x30>; + clocks = <&cru MCLK_REF_MIPI0>; + clock-names = "xvclk"; + pwdn-gpios = <&gpio3 RK_PC5 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&mipi_refclk_out0>; + rockchip,camera-module-index = <0>; + rockchip,camera-module-facing = "back"; + rockchip,camera-module-name = "FKO1"; + rockchip,camera-module-lens-name = "30IRC-F16"; + port { + sc3338_30_out: endpoint { + remote-endpoint = <&csi_dphy_input0>; + data-lanes = <1 2>; + }; + }; + }; + + sc3338_32: sc3338_32@32 { + compatible = "smartsens,sc3338"; + status = "okay"; + reg = <0x32>; + clocks = <&cru MCLK_REF_MIPI1>; + clock-names = "xvclk"; + pwdn-gpios = <&gpio3 RK_PC5 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&mipi_refclk_out1>; + rockchip,camera-module-index = <1>; + rockchip,camera-module-facing = "back"; + rockchip,camera-module-name = "FKO1"; + rockchip,camera-module-lens-name = "30IRC-F16"; + port { + sc3338_32_out: endpoint { + remote-endpoint = <&csi_dphy_input1>; + data-lanes = <1 2>; + }; + }; + }; +}; + +&mipi0_csi2 { + status = "okay"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + mipi0_csi2_input: endpoint@1 { + reg = <1>; + remote-endpoint = <&csi_dphy_output0>; + }; + }; + + port@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + mipi0_csi2_output: endpoint@0 { + reg = <0>; + remote-endpoint = <&cif_mipi0_in>; + }; + }; + }; +}; + +&mipi1_csi2 { + status = "okay"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + mipi1_csi2_input: endpoint@1 { + reg = <1>; + remote-endpoint = <&csi_dphy_output1>; + }; + }; + + port@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + mipi1_csi2_output: endpoint@0 { + reg = <0>; + remote-endpoint = <&cif_mipi1_in>; + }; + }; + }; +}; + +&mailbox { + status = "okay"; +}; + +&rkcif { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&mipi_pins>; +}; + +&rkcif_mipi_lvds { + status = "okay"; + memory-region-thunderboot = <&rkisp_thunderboot>; + + port { + /* MIPI CSI-2 endpoint */ + cif_mipi0_in: endpoint { + remote-endpoint = <&mipi0_csi2_output>; + }; + }; +}; + +&rkcif_mipi_lvds_sditf { + status = "okay"; + + port { + /* MIPI CSI-2 endpoint */ + mipi_lvds_sditf: endpoint { + remote-endpoint = <&isp_in>; + }; + }; +}; + +&rkcif_mipi_lvds1 { + status = "okay"; + memory-region-thunderboot = <&rkisp1_thunderboot>; + + port { + /* MIPI CSI-2 endpoint */ + cif_mipi1_in: endpoint { + remote-endpoint = <&mipi1_csi2_output>; + }; + }; +}; + +&rkcif_mipi_lvds1_sditf { + status = "okay"; + + port { + /* MIPI CSI-2 endpoint */ + mipi_lvds1_sditf: endpoint { + remote-endpoint = <&isp_in1>; + }; + }; +}; + +&rkisp { + status = "okay"; +}; + +&rkisp_vir0 { + status = "okay"; + memory-region-thunderboot = <&rkisp_thunderboot>; + + port@0 { + isp_in: endpoint { + remote-endpoint = <&mipi_lvds_sditf>; + }; + }; +}; + +&rkisp_vir1 { + status = "okay"; + memory-region-thunderboot = <&rkisp_thunderboot>; + + port@0 { + isp_in1: endpoint { + remote-endpoint = <&mipi_lvds1_sditf>; + }; + }; +}; + +&thunder_boot_service { + status = "okay"; +}; + +&meta{ + /* reg's offset MUST match with RTOS */ + reg = <0x00800000 0xb0000>; +}; + +&rkisp_thunderboot { + /* reg's offset MUST match with RTOS */ + /* + * vicap, capture raw10, ceil(w*10/8/256)*256*h *4(buf num) + * e.g. 2304x1296: 0xf30000 + * 0x008b0000 = (meta's reg offset) + (meta's reg size) + * = 0x00800000 + 0xb0000 + */ + reg = <0x008b0000 0xf30000>; +}; + +&ramdisk_r { + reg = <0x17e0000 (10 * 0x00100000)>; +}; + +&ramdisk_c { + reg = <0x21e0000 (5 * 0x00100000)>; +}; + +&rkisp1_thunderboot { + /* + * vicap, capture raw10, ceil(w*10/8/256)*256*h *4(buf num) + * e.g. 2304x1296: 0xf30000 + * 0x26e0000 = (ramdisk_c's reg offset) + (ramdisk_c's reg size) + * = 0x21e0000 + (5 * 0x00100000) + */ + reg = <0x26e0000 0xf30000>; +}; + +&pinctrl { + sdmmc { + /omit-if-no-ref/ + sdmmc_pwren: sdmmc-pwren { + rockchip,pins = <2 RK_PA7 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; +}; + +&pwm10 { + status = "okay"; +}; + +&pwm11 { + status = "okay"; +}; + +&sdio { + max-frequency = <50000000>; + bus-width = <1>; + cap-sd-highspeed; + cap-sdio-irq; + keep-power-in-suspend; + non-removable; + rockchip,default-sample-phase = <90>; + no-sd; + no-mmc; + supports-sdio; + pinctrl-names = "default"; + pinctrl-0 = <&sdmmc1m0_cmd &sdmmc1m0_clk &sdmmc1m0_bus4>; + status = "okay"; +}; + +&sdmmc { + max-frequency = <200000000>; + no-sdio; + no-mmc; + bus-width = <4>; + cap-mmc-highspeed; + cap-sd-highspeed; + disable-wp; + pinctrl-names = "default"; + pinctrl-0 = <&sdmmc0_clk &sdmmc0_cmd &sdmmc0_det &sdmmc0_bus4>; + vmmc-supply = <&vcc3v3_sd>; + status = "okay"; +}; + +&sfc { + assigned-clocks = <&cru SCLK_SFC>; + assigned-clock-rates = <125000000>; + status = "okay"; + + flash@0 { + compatible = "jedec,spi-nor"; + reg = <0>; + spi-max-frequency = <125000000>; + spi-rx-bus-width = <4>; + spi-tx-bus-width = <1>; + }; +}; + +&usbdrd_dwc3 { + dr_mode = "peripheral"; +}; From d031ad7267053c0ee41b6b799efc949732a8237c Mon Sep 17 00:00:00 2001 From: Wu Liangqing Date: Sat, 18 Mar 2023 02:23:35 +0000 Subject: [PATCH 70/79] arm64: dts: rockchip: adaptive rk3399-sapphire-excavator Change-Id: I3e7b80091775414c1d51eda1cb14c50c0e930fc2 Signed-off-by: Wu Liangqing --- .../rk3399-sapphire-excavator-edp.dtsi | 51 ++++++-- .../boot/dts/rockchip/rk3399-sapphire.dtsi | 117 ++++++++++++++++-- 2 files changed, 151 insertions(+), 17 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator-edp.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator-edp.dtsi index 67aab337f66f..89ff138a5794 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator-edp.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator-edp.dtsi @@ -11,6 +11,46 @@ #include "rk3399-vop-clk-set.dtsi" / { + backlight: backlight { + compatible = "pwm-backlight"; + brightness-levels = < + 0 1 2 3 4 5 6 7 + 8 9 10 11 12 13 14 15 + 16 17 18 19 20 21 22 23 + 24 25 26 27 28 29 30 31 + 32 33 34 35 36 37 38 39 + 40 41 42 43 44 45 46 47 + 48 49 50 51 52 53 54 55 + 56 57 58 59 60 61 62 63 + 64 65 66 67 68 69 70 71 + 72 73 74 75 76 77 78 79 + 80 81 82 83 84 85 86 87 + 88 89 90 91 92 93 94 95 + 96 97 98 99 100 101 102 103 + 104 105 106 107 108 109 110 111 + 112 113 114 115 116 117 118 119 + 120 121 122 123 124 125 126 127 + 128 129 130 131 132 133 134 135 + 136 137 138 139 140 141 142 143 + 144 145 146 147 148 149 150 151 + 152 153 154 155 156 157 158 159 + 160 161 162 163 164 165 166 167 + 168 169 170 171 172 173 174 175 + 176 177 178 179 180 181 182 183 + 184 185 186 187 188 189 190 191 + 192 193 194 195 196 197 198 199 + 200 201 202 203 204 205 206 207 + 208 209 210 211 212 213 214 215 + 216 217 218 219 220 221 222 223 + 224 225 226 227 228 229 230 231 + 232 233 234 235 236 237 238 239 + 240 241 242 243 244 245 246 247 + 248 249 250 251 252 253 254 255>; + default-brightness-level = <200>; + pwms = <&pwm0 0 25000 0>; + enable-gpios = <&gpio4 29 GPIO_ACTIVE_HIGH>; + }; + vcc_lcd: vcc-lcd { compatible = "regulator-fixed"; regulator-name = "vcc_lcd"; @@ -20,7 +60,7 @@ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; regulator-boot-on; - vin-supply = <&vcc5v0_sys>; + vin-supply = <&vcc_sys>; }; panel: panel { @@ -70,10 +110,6 @@ }; }; -&backlight { - status = "okay"; - enable-gpios = <&gpio4 29 GPIO_ACTIVE_HIGH>; -}; &edp { status = "okay"; @@ -102,11 +138,6 @@ status = "okay"; }; -&cdn_dp { - status = "okay"; - extcon = <&fusb0>; - phys = <&tcphy0_dp>; -}; &hdmi_dp_sound { status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index 3d9e27750139..dadafc3d399e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -3,6 +3,7 @@ * Copyright (c) 2017 Fuzhou Rockchip Electronics Co., Ltd. */ +#include "dt-bindings/usb/pd.h" #include "dt-bindings/pwm/pwm.h" #include "dt-bindings/input/input.h" #include "rk3399.dtsi" @@ -137,6 +138,11 @@ }; }; +&cdn_dp { + status = "okay"; + phys = <&tcphy0_dp>; +}; + &cpu_l0 { cpu-supply = <&vdd_cpu_l>; }; @@ -205,7 +211,6 @@ }; &hdmi { - ddc-i2c-bus = <&i2c3>; status = "okay"; }; @@ -437,10 +442,77 @@ }; }; -&i2c3 { - i2c-scl-rising-time-ns = <450>; - i2c-scl-falling-time-ns = <15>; +&i2c4 { status = "okay"; + i2c-scl-rising-time-ns = <475>; + i2c-scl-falling-time-ns = <26>; + + usbc0: fusb302@22 { + compatible = "fcs,fusb302"; + reg = <0x22>; + interrupt-parent = <&gpio1>; + interrupts = ; + pinctrl-names = "default"; + pinctrl-0 = <&usbc0_int>; + vbus-supply = <&vcc5v0_typec0>; + status = "okay"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + usbc0_role_sw: endpoint@0 { + remote-endpoint = <&dwc3_0_role_switch>; + }; + }; + }; + + usb_con: connector { + compatible = "usb-c-connector"; + label = "USB-C"; + data-role = "dual"; + power-role = "dual"; + try-power-role = "sink"; + op-sink-microwatt = <1000000>; + sink-pdos = + ; + source-pdos = + ; + + displayport = <&cdn_dp>; + + altmodes { + #address-cells = <1>; + #size-cells = <0>; + + altmode@0 { + reg = <0>; + svid = <0xff01>; + vdo = <0xffffffff>; + }; + }; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + usbc0_orien_sw: endpoint { + remote-endpoint = <&tcphy0_orientation_switch>; + }; + }; + port@1 { + reg = <1>; + dp_mode_sw: endpoint { + remote-endpoint = <&tcphy_dp_altmode_switch>; + }; + }; + }; + }; + }; }; &i2s2 { @@ -502,6 +574,13 @@ rockchip,pins = <4 RK_PD1 RK_FUNC_GPIO &pcfg_pull_none>; }; + }; + + usb-typec { + usbc0_int: usbc0-int { + rockchip,pins = <1 RK_PA2 RK_FUNC_GPIO &pcfg_pull_up>; + }; + vcc5v0_typec0_en: vcc5v0-typec0-en { rockchip,pins = <2 RK_PA0 RK_FUNC_GPIO &pcfg_pull_none>; @@ -549,6 +628,21 @@ &tcphy0 { status = "okay"; + svid = <0xff01>; + orientation-switch; + + port { + #address-cells = <1>; + #size-cells = <0>; + tcphy0_orientation_switch: endpoint@0 { + reg = <0>; + remote-endpoint = <&usbc0_orien_sw>; + }; + tcphy_dp_altmode_switch: endpoint@1 { + reg = <1>; + remote-endpoint = <&dp_mode_sw>; + }; + }; }; &tcphy1 { @@ -571,7 +665,7 @@ }; u2phy0_host: host-port { - phy-supply = <&vcc5v0_typec0>; + phy-supply = <&vcc5v0_host>; status = "okay"; }; }; @@ -621,7 +715,16 @@ &usbdrd_dwc3_0 { status = "okay"; - dr_mode = "host"; + dr_mode = "otg"; + usb-role-switch; + port { + #address-cells = <1>; + #size-cells = <0>; + dwc3_0_role_switch: endpoint@0 { + reg = <0>; + remote-endpoint = <&usbc0_role_sw>; + }; + }; }; &usbdrd3_1 { @@ -647,4 +750,4 @@ &vopl_mmu { status = "okay"; -}; +}; \ No newline at end of file From 48b91619b8d95223aad729547960ea3029344e23 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Tue, 21 Mar 2023 17:41:14 +0800 Subject: [PATCH 71/79] PCI: rockchip: dw: Double check LTSSM We saw a link failure after linkup which should fail to probe the device actually. That was misleading for PCIe stack to scan the device. Add a double check for that. If that happened, just try to link until the timeout happened. [ 2.373308] rk-pcie fe180000.pcie: PCIe Linking... LTSSM is 0x3 [ 2.455521] rk-pcie fe180000.pcie: PCIe Link up, LTSSM is 0x2 [ 2.455658] rk-pcie fe180000.pcie: PCI host bridge to bus 0003:30 Signed-off-by: Shawn Lin Change-Id: I0f9bcbb42d77a80aa1cb533952427b71096240d6 --- drivers/pci/controller/dwc/pcie-dw-rockchip.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c index 3cd09125933b..3bcbc3a17d2f 100644 --- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c +++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c @@ -815,10 +815,13 @@ static int rk_pcie_establish_link(struct dw_pcie *pci) * more for Gen switch. */ msleep(50); - dev_info(pci->dev, "PCIe Link up, LTSSM is 0x%x\n", - rk_pcie_readl_apb(rk_pcie, PCIE_CLIENT_LTSSM_STATUS)); - rk_pcie_debug_dump(rk_pcie); - return 0; + /* In case link drop after linkup, double check it */ + if (dw_pcie_link_up(pci)) { + dev_info(pci->dev, "PCIe Link up, LTSSM is 0x%x\n", + rk_pcie_readl_apb(rk_pcie, PCIE_CLIENT_LTSSM_STATUS)); + rk_pcie_debug_dump(rk_pcie); + return 0; + } } dev_info_ratelimited(pci->dev, "PCIe Linking... LTSSM is 0x%x\n", From 232b116165622c5fa63d475ee29a5a8a9ac5c67b Mon Sep 17 00:00:00 2001 From: Jianwei Fan Date: Wed, 22 Mar 2023 09:32:35 +0000 Subject: [PATCH 72/79] media: i2c: imx577: fix gain step Fixes: de9477630d9c ("media: i2c: imx577: add dgain control") Change-Id: Ie78a4330a8c38f605db2b07bc867f20cb3a29bb1 Signed-off-by: Jianwei Fan --- drivers/media/i2c/imx577.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/imx577.c b/drivers/media/i2c/imx577.c index 0d4eca4ef866..b1567c2341d0 100644 --- a/drivers/media/i2c/imx577.c +++ b/drivers/media/i2c/imx577.c @@ -70,7 +70,7 @@ #define IMX577_REG_GAIN_L 0x0205 #define IMX577_GAIN_MIN 0x10 #define IMX577_GAIN_MAX 0x1600 -#define IMX577_GAIN_STEP 0x10 +#define IMX577_GAIN_STEP 0x1 #define IMX577_GAIN_DEFAULT 0x20 #define IMX577_REG_DGAIN 0x3ff9 From e760c21d3744fc022337dcaf1fbea0af4a1b193b Mon Sep 17 00:00:00 2001 From: Zorro Liu Date: Fri, 17 Mar 2023 11:15:59 +0800 Subject: [PATCH 73/79] dt-bindings: soc: rockchip: add reboot mode quiescent Signed-off-by: Zorro Liu Change-Id: I249f7dc7b8f464e0f5dd69797124fba036126b55 --- include/dt-bindings/soc/rockchip,boot-mode.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/dt-bindings/soc/rockchip,boot-mode.h b/include/dt-bindings/soc/rockchip,boot-mode.h index 1436e1d32619..ec4e5dd83cc1 100644 --- a/include/dt-bindings/soc/rockchip,boot-mode.h +++ b/include/dt-bindings/soc/rockchip,boot-mode.h @@ -20,5 +20,7 @@ #define BOOT_CHARGING (REBOOT_FLAG + 11) /* enter usb mass storage mode */ #define BOOT_UMS (REBOOT_FLAG + 12) +/* reboot system quiescent */ +#define BOOT_QUIESCENT (REBOOT_FLAG + 14) #endif From c9e25627a9b123227a45ff09bf9c2895abe3ab4a Mon Sep 17 00:00:00 2001 From: Zorro Liu Date: Thu, 23 Mar 2023 01:18:06 +0000 Subject: [PATCH 74/79] arm64: dts: rockchip: rk3588s: add reboot mode quiescent Change-Id: I263760f6a4869210ec80eac4ae4437bb7762d625 Signed-off-by: Zorro Liu --- arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 07fb66eaa8c0..08f3c216eb61 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -2135,6 +2135,7 @@ mode-ums = ; mode-panic = ; mode-watchdog = ; + mode-quiescent = ; }; }; From 535756b9725c64c9cf99567d81006a008777c7d3 Mon Sep 17 00:00:00 2001 From: Zorro Liu Date: Tue, 21 Mar 2023 09:46:13 +0800 Subject: [PATCH 75/79] backlight: pwm_bl: set bl brightness 0 when reboot quiescent Signed-off-by: Zorro Liu Change-Id: Idf24ba57198dfb404d36507e207069c542924eea --- drivers/video/backlight/pwm_bl.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index cd3676b96fb0..38ac5fb70b1d 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -19,6 +19,11 @@ #include #include +static bool bl_quiescent; +module_param_named(quiescent, bl_quiescent, bool, 0600); +MODULE_PARM_DESC(quiescent, + "pwm bl quiescent when reboot quiescent [default=false]"); + struct pwm_bl_data { struct pwm_device *pwm; struct device *dev; @@ -627,7 +632,12 @@ static int pwm_backlight_probe(struct platform_device *pdev) data->dft_brightness = data->max_brightness; } - bl->props.brightness = data->dft_brightness; + /* set brightness 0, when boot quiescent */ + if (bl_quiescent) + bl->props.brightness = 0; + else + bl->props.brightness = data->dft_brightness; + bl->props.power = pwm_backlight_initial_power_state(pb); backlight_update_status(bl); From 50c611443783101b1aa2dd9d29f8c474bd31d8cd Mon Sep 17 00:00:00 2001 From: Shiqin Chen Date: Thu, 23 Mar 2023 09:45:16 +0800 Subject: [PATCH 76/79] arm64: configs: rk3588_edge: Enable CONFIG_SENSORS_PWM_FAN Signed-off-by: Shiqin Chen Change-Id: I76ab7f583afe3f891d5680056b12bab24a6b0407 --- arch/arm64/configs/rk3588_edge.config | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/rk3588_edge.config b/arch/arm64/configs/rk3588_edge.config index 8b07747a6020..8668adf9d5f6 100644 --- a/arch/arm64/configs/rk3588_edge.config +++ b/arch/arm64/configs/rk3588_edge.config @@ -11,3 +11,4 @@ CONFIG_USB_CONFIGFS_RNDIS=y CONFIG_USB_CONFIGFS_F_UAC1=y CONFIG_USB_CONFIGFS_F_UAC2=y CONFIG_USB_CONFIGFS_F_HID=y +CONFIG_SENSORS_PWM_FAN=y From 32c4d8949455b016ca14bd2cca76bbb5889ea74f Mon Sep 17 00:00:00 2001 From: Yiqing Zeng Date: Thu, 23 Mar 2023 17:29:18 +0800 Subject: [PATCH 77/79] ARM: dts: rockchip: rv1106-evb-cam: change os04a10 module name and lens name Signed-off-by: Yiqing Zeng Change-Id: I37f114e048f9e919f8fe02eee749124276e2b402 --- arch/arm/boot/dts/rv1106-evb-cam.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/rv1106-evb-cam.dtsi b/arch/arm/boot/dts/rv1106-evb-cam.dtsi index 5353ef9de50b..2e4967f10a66 100644 --- a/arch/arm/boot/dts/rv1106-evb-cam.dtsi +++ b/arch/arm/boot/dts/rv1106-evb-cam.dtsi @@ -144,8 +144,8 @@ pinctrl-0 = <&mipi_refclk_out0>; rockchip,camera-module-index = <0>; rockchip,camera-module-facing = "back"; - rockchip,camera-module-name = "408b170b9d8a"; - rockchip,camera-module-lens-name = "40IRC-F10"; + rockchip,camera-module-name = "CMK-OT1607-PV1"; + rockchip,camera-module-lens-name = "50IRC-F16"; port { os04a10_out: endpoint { remote-endpoint = <&csi_dphy_input3>; From f035db5f706b17a0d96aaaa0f02fe6ee423dcaae Mon Sep 17 00:00:00 2001 From: Cai YiWei Date: Thu, 23 Mar 2023 17:42:15 +0800 Subject: [PATCH 78/79] media: rockchip: isp: fix read BP_WR_CTRL reg Change-Id: I2d448dcd6db98dafeb00209cb9d876a5ee98c51c Signed-off-by: Cai YiWei --- drivers/media/platform/rockchip/isp/rkisp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/isp/rkisp.c b/drivers/media/platform/rockchip/isp/rkisp.c index 485f3346d35e..29c3287a50cd 100644 --- a/drivers/media/platform/rockchip/isp/rkisp.c +++ b/drivers/media/platform/rockchip/isp/rkisp.c @@ -782,7 +782,7 @@ run_next: else dev->irq_ends_mask &= ~ISP_FRAME_MPFBC; if ((dev->isp_ver == ISP_V30 && - rkisp_read(dev, ISP3X_BP_ENABLE, true) & ISP3X_BP_ENABLE) || + rkisp_read(dev, ISP3X_MI_BP_WR_CTRL, true) & ISP3X_BP_ENABLE) || (dev->isp_ver == ISP_V32 && rkisp_read(dev, ISP32_MI_WR_CTRL2_SHD, true) & ISP32_BP_EN_OUT_SHD)) dev->irq_ends_mask |= ISP_FRAME_BP; From d6fb546c185800776c1d001cd135c80ccb7cf51f Mon Sep 17 00:00:00 2001 From: Cai YiWei Date: Mon, 13 Mar 2023 10:59:34 +0800 Subject: [PATCH 79/79] media: rockchip: isp: version v2.2.0 Change-Id: I58699277e15b23c7eb8d4730c8d043164fb4746b Signed-off-by: Cai YiWei --- drivers/media/platform/rockchip/isp/version.h | 17 +++++++++++++++++ include/uapi/linux/rkisp2-config.h | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/isp/version.h b/drivers/media/platform/rockchip/isp/version.h index ff76f199011e..0fe7eb9e0051 100644 --- a/drivers/media/platform/rockchip/isp/version.h +++ b/drivers/media/platform/rockchip/isp/version.h @@ -404,6 +404,23 @@ * 43.fix first frame loss * 44.fix rgb range for selfpath * 45.stats buf add application params frame id + * + * v2.2.0 + * 1.add isp32 lite + * 2.add rk3562 config + * 3.add iqtool video for isp32 lite + * 4.fix build warn + * 5.dvfs for multi dev on/off + * 6.luma stream resolution alignment + * 7.fix sensor off to enable reset + * 8.fix isp and cif build warning + * 9.Return error code for ioctl set_meshbuf_size + * 10.fix isp32 lite mainpath switch fail + * 11.fix warning of vb2 cancel or done + * 12.fix isp no work due to irq_ends_mask error + * 13.thunder boot with multi sensor + * 14.support buf early done + * 15.fix read BP_WR_CTRL reg */ #define RKISP_DRIVER_VERSION RKISP_API_VERSION diff --git a/include/uapi/linux/rkisp2-config.h b/include/uapi/linux/rkisp2-config.h index efdcd2f4c326..0f5a9d1d5625 100644 --- a/include/uapi/linux/rkisp2-config.h +++ b/include/uapi/linux/rkisp2-config.h @@ -10,7 +10,7 @@ #include #include -#define RKISP_API_VERSION KERNEL_VERSION(2, 1, 0) +#define RKISP_API_VERSION KERNEL_VERSION(2, 2, 0) /****************ISP SUBDEV IOCTL*****************************/